Infrastructure for new group kernels, and new C kernels
authorErik Lindahl <erik@kth.se>
Sun, 28 Oct 2012 18:45:52 +0000 (14:45 -0400)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 8 Nov 2012 07:42:10 +0000 (08:42 +0100)
This patch implements a new selection mechanism for nonbonded
kernels, as well as modifications of the neighborsearching to allow
long-range lists to be used every step (for rlist!=rcoul!=rvdw),
which is necessary for PP-PME load balancing. We can also evaluate
long-range interactions at steps that are arbitrary divisors of
nstlist. Kernels are now selected from text strings describing
desired options, which makes it much easier to have different
kernels for different architectures and gradually add kernels for
commonly used cases without implementing all combinations of
features. There is also a new kernel generator and flexible
preprocessor written in python. This is only executed by developers,
and the generated kernels are therefor included in the git tree
after discussions. The actual kernels are now written in plain C
with slightly more advanced preprocessor options allowing loops. The
listed (1,4) nonbonded interactions now use their own code rather
than executing a complete kernel call for each pair, which saves
a lot of overhead. The old c kernels have been adapted to continue
working, but addres kernels are disabled by this patch. Subsequent
patches will start adding back the kernels (one per architecture),
and also the addres kernels. The group kernels now support arbitrary
combinations of switch and shift for most interactions (but the
might be run in the generic kernel), and there are new kernels
specifically designed for Ewald direct space interactions as well as
force-only kernels.

Change-Id: Iab142bcc2e6d914ca127b4caa6a07b604549cb13

380 files changed:
CMakeLists.txt
cmake/gmxTestInline.cmake
cmake/gmxTestRestrict.cmake
include/force.h
include/names.h
include/nonbonded.h
include/ns.h
include/string2.h
include/tables.h
include/types/enums.h
include/types/force_flags.h
include/types/forcerec.h
include/types/idef.h
include/types/inputrec.h
include/types/interaction_const.h
include/types/nblist.h
include/types/nrnb.h
include/types/simple.h
include/vec.h
share/html/online/mdp_opt.html
src/config.h.cmakein
src/gmxlib/bondfree.c
src/gmxlib/names.c
src/gmxlib/nonbonded/CMakeLists.txt
src/gmxlib/nonbonded/nb_free_energy.c
src/gmxlib/nonbonded/nb_free_energy.h
src/gmxlib/nonbonded/nb_generic.c
src/gmxlib/nonbonded/nb_generic.h
src/gmxlib/nonbonded/nb_generic_adress.c
src/gmxlib/nonbonded/nb_generic_cg.c
src/gmxlib/nonbonded/nb_generic_cg.h
src/gmxlib/nonbonded/nb_kernel.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel.h [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c
src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h
src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.c
src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.h
src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py [new file with mode: 0755]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h [deleted file]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h
src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.c
src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.h
src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.c
src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.h
src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.c
src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.h
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h
src/gmxlib/nonbonded/nb_kerneltype.h [deleted file]
src/gmxlib/nonbonded/nonbonded.c
src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py [new file with mode: 0755]
src/gmxlib/nrnb.c
src/gmxlib/string2.c
src/gmxlib/tpxio.c
src/gmxlib/txtdump.c
src/kernel/do_gct.c
src/kernel/gctio.c
src/kernel/md.c
src/kernel/openmm_wrapper.cpp
src/kernel/pme_loadbal.c
src/kernel/readir.c
src/kernel/runner.c
src/kernel/tpbcmp.c
src/mdlib/adress.c
src/mdlib/domdec.c
src/mdlib/force.c
src/mdlib/forcerec.c
src/mdlib/genborn.c
src/mdlib/minimize.c
src/mdlib/nbnxn_atomdata.c
src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
src/mdlib/ns.c
src/mdlib/qmmm.c
src/mdlib/sim_util.c
src/mdlib/tables.c
src/mdlib/tpi.c
src/mdlib/update.c
src/mdlib/wall.c
src/mdlib/wnblist.c
src/tools/addconf.c
src/tools/calcpot.c
src/tools/gmx_h2order.c

index c413e515e994c54309003f558a06a46b19176631..e3a9c41eec45d658bb9e98512ddd53cbd3143806 100644 (file)
@@ -409,6 +409,9 @@ check_function_exists(sigaction         HAVE_SIGACTION)
 check_function_exists(sysconf           HAVE_SYSCONF)
 check_function_exists(sched_setaffinity HAVE_SCHED_SETAFFINITY)
 check_function_exists(sched_getaffinity HAVE_SCHED_GETAFFINITY)
+check_function_exists(rsqrt             HAVE_RSQRT)
+check_function_exists(rsqrtf            HAVE_RSQRTF)
+check_function_exists(sqrtf             HAVE_SQRTF)
 
 include(CheckLibraryExists)
 check_library_exists(m sqrt "" HAVE_LIBM)
@@ -670,7 +673,7 @@ include(gmxTestInline)
 gmx_test_inline(INLINE_KEYWORD)
 
 include(gmxTestRestrict)
-gmx_test_inline(RESTRICT_KEYWORD)
+gmx_test_restrict(RESTRICT_KEYWORD)
 
 include(gmxTestPipes)
 gmx_test_pipes(HAVE_PIPES)
index f1108b6e10f3fdb7747b1fb278108f1a9d3296bd..e107ac46085b054be2bf28ae69860fe9bea98bb9 100644 (file)
@@ -11,20 +11,20 @@ MACRO(GMX_TEST_INLINE VARIABLE)
 
         MESSAGE(STATUS "Checking for inline keyword")
 
-       FOREACH(KEYWORD "inline" "__inline__" "__inline")
+       FOREACH(KEYWORD "__inline__" "__inline" "inline")
             IF(NOT TEST_${VARIABLE})
                 TRY_COMPILE(TEST_${VARIABLE} "${CMAKE_BINARY_DIR}"    
                             "${CMAKE_SOURCE_DIR}/cmake/TestInline.c"
                             COMPILE_DEFINITIONS "-DTESTINLINEDEF=${KEYWORD}" )
-                SET(CHK_INLINE_KEYWORD ${KEYWORD})
-            ENDIF(NOT TEST_${VARIABLE})
+                SET(LAST_INLINE_KEYWORD ${KEYWORD})
+           ENDIF(NOT TEST_${VARIABLE})
         ENDFOREACH(KEYWORD)
-             
+
         IF(TEST_${VARIABLE})
-            SET(${VARIABLE} ${KEYWORD})
-            MESSAGE(STATUS "Checking for inline keyword - ${CHK_INLINE_KEYWORD}")
+            SET(${VARIABLE} ${LAST_INLINE_KEYWORD} CACHE INTERNAL "Inline keyword" FORCE)
+            MESSAGE(STATUS "Checking for inline keyword - ${LAST_INLINE_KEYWORD}")
         ELSE(TEST_${VARIABLE})
-           SET(${VARIABLE} " ")
+           SET(${VARIABLE} " " CACHE INTERNAL "Inline keyword" FORCE)
             MESSAGE(STATUS "Checking for inline keyword - not found")
         ENDIF(TEST_${VARIABLE})
 
index d2bc2b5b3c12aad96785c857bc8ddf789a621930..f23819bb89b227baa82ee9deeaddd366f67fef39 100644 (file)
@@ -17,15 +17,15 @@ MACRO(GMX_TEST_RESTRICT VARIABLE)
                 TRY_COMPILE(TEST_${VARIABLE} "${CMAKE_BINARY_DIR}"    
                             "${CMAKE_SOURCE_DIR}/cmake/TestRestrict.c"
                             COMPILE_DEFINITIONS "-DTESTRESTRICTDEF=${KEYWORD}" )
-                SET(CHK_RESTRICT_KEYWORD ${KEYWORD})
+                SET(LAST_RESTRICT_KEYWORD ${KEYWORD})
             ENDIF(NOT TEST_${VARIABLE})
         ENDFOREACH(KEYWORD)
-             
+
         IF(TEST_${VARIABLE})
-            SET(${VARIABLE} ${KEYWORD})
-            MESSAGE(STATUS "Checking for restrict keyword - ${CHK_RESTRICT_KEYWORD}")
+            SET(${VARIABLE} ${LAST_RESTRICT_KEYWORD} CACHE INTERNAL "Restrict keyword" FORCE)
+            MESSAGE(STATUS "Checking for restrict keyword - ${LAST_RESTRICT_KEYWORD}")
         ELSE(TEST_${VARIABLE})
-           SET(${VARIABLE} " ")
+           SET(${VARIABLE} " " CACHE INTERNAL "Restrict keyword" FORCE)
             MESSAGE(STATUS "Checking for restrict keyword - not found")
         ENDIF(TEST_${VARIABLE})
 
index 758b748dbf6bd43a981f4c255408ceb1e701fd42..d9bb5294c25af069fa0294cbaf8b39497cb65a10 100644 (file)
@@ -138,13 +138,18 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 
 void init_interaction_const_tables(FILE *fp, 
                                    interaction_const_t *ic,
-                                   int verlet_kernel_type);
+                                   int cutoff_scheme,
+                                   int verlet_kernel_type,
+                                   real rtab);
 /* Initializes the tables in the interaction constant data structure.
+ * Setting verlet_kernel_type to -1 always initializes tables for
+ * use with group kernels.
  */
 
 void init_interaction_const(FILE *fp, 
                             interaction_const_t **interaction_const,
-                            const t_forcerec *fr);
+                            const t_forcerec *fr,
+                            real  rtab);
 /* Initializes the interaction constant data structure. Currently it 
  * uses forcerec as input. 
  */
@@ -242,9 +247,7 @@ void ns(FILE       *fplog,
               real       *dvdlambda,
               gmx_grppairener_t *grppener,
               gmx_bool       bFillGrid,
-              gmx_bool       bDoLongRange,
-              gmx_bool       bDoForces,
-              rvec       *f);
+           gmx_bool       bDoLongRangeNS);
 /* Call the neighborsearcher */
 
 extern void do_force_lowlevel(FILE         *fplog,  
@@ -259,7 +262,8 @@ extern void do_force_lowlevel(FILE         *fplog,
                              t_grpopts    *opts,
                              rvec         x[],
                              history_t    *hist,
-                             rvec         f[],    
+                             rvec         f_shortrange[],
+                  rvec         f_longrange[],
                              gmx_enerdata_t *enerd,
                              t_fcdata     *fcd,
                              gmx_mtop_t     *mtop,
index a654b3838824288238ba78fec0d1d272417d3a8a..7b2e56aa2641f22fa2273c874b06d8a25f8c83b4 100644 (file)
@@ -78,7 +78,6 @@ extern const char *elmceq_names[elmceqNR+1];
 extern const char *separate_dhdl_file_names[esepdhdlfileNR+1];
 extern const char *dhdl_derivatives_names[edhdlderivativesNR+1];
 extern const char *esol_names[esolNR+1];
-extern const char *enlist_names[enlistNR+1];
 extern const char *edispc_names[edispcNR+1];
 extern const char *ecm_names[ecmNR+1];
 extern const char *eann_names[eannNR+1];
@@ -98,6 +97,9 @@ extern const char *eMultentOpt_names[eMultentOptNR+1];
 extern const char *eAdresstype_names[eAdressNR+1];
 extern const char *eAdressICtype_names[eAdressICNR+1];
 extern const char *eAdressSITEtype_names[eAdressSITENR+1];
+extern const char *gmx_nblist_geometry_names[GMX_NBLIST_GEOMETRY_NR+1];
+extern const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1];
+extern const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1];
 
 #define        UNDEFINED               "UNDEFINED"
 #define ENUM_NAME(e,max,names) ((((e)<0)||((e)>=(max)))?UNDEFINED:(names)[e])
index a00bbd99eb6cd2bcc7931fe7501df60f65836c14..23aa6421f74402242cd48e6e5ad81080047c976c 100644 (file)
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
+
+
+
+void
+gmx_nonbonded_setup(FILE *         fplog,
+                    t_forcerec *   fr,
+                    gmx_bool       bGenericKernelOnly);
+
+
+
+
+
+void
+gmx_nonbonded_set_kernel_pointers(FILE *       fplog,
+                                  t_nblist *   nl);
+
 
-void gmx_setup_kernels(FILE *fplog,t_forcerec *fr,gmx_bool bGenericKernelOnly);
-void gmx_setup_adress_kernels(FILE *fplog,gmx_bool bGenericKernelOnly);
 
-#define GMX_DONB_LR             (1<<0)
-#define GMX_DONB_FORCES         (1<<1)
-#define GMX_DONB_FOREIGNLAMBDA  (1<<2)
+#define GMX_NONBONDED_DO_LR             (1<<0)
+#define GMX_NONBONDED_DO_FORCE          (1<<1)
+#define GMX_NONBONDED_DO_FOREIGNLAMBDA  (1<<2)
+#define GMX_NONBONDED_DO_POTENTIAL      (1<<3)
+#define GMX_NONBONDED_DO_SR             (1<<4)
 
 void
 do_nonbonded(t_commrec *cr,t_forcerec *fr,
-             rvec x[],rvec f[],t_mdatoms *md,t_blocka *excl,
-             real egnb[],real egcoul[],real egb[],rvec box_size,
+             rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *md,t_blocka *excl,
+             gmx_grppairener_t *grppener,rvec box_size,
              t_nrnb *nrnb,real *lambda,real dvdlambda[],
              int nls,int eNL,int flags);
 
-/* Calculate VdW/charge pair interactions (usually 1-4 interactions).
+/* Calculate VdW/charge listed pair interactions (usually 1-4 interactions).
  * global_atom_index is only passed for printing error messages.
  */
 real
-do_listed_vdw_q(int ftype,int nbonds,
-               const t_iatom iatoms[],const t_iparams iparams[],
-               const rvec x[],rvec f[],rvec fshift[],
-               const t_pbc *pbc,const t_graph *g,
-               real *lambda,real *dvdlambda,
-               const t_mdatoms *md,
-               const t_forcerec *fr,gmx_grppairener_t *grppener,
-               int *global_atom_index);
+do_nonbonded_listed(int ftype,int nbonds,const t_iatom iatoms[],const t_iparams iparams[],
+                    const rvec x[],rvec f[],rvec fshift[],const t_pbc *pbc,const t_graph *g,
+                    real *lambda, real *dvdl,const t_mdatoms *md,const t_forcerec *fr,
+                    gmx_grppairener_t *grppener,int *global_atom_index);
 
 #ifdef __cplusplus
 }
index bfe9b4c9af96bb7ed3d158b53874cf054161d596..d59af03482f66d3883adfa2a8f2f814ebbe54021 100644 (file)
@@ -89,8 +89,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                             real *lambda,real *dvdlambda,
                             gmx_grppairener_t *grppener,
                             gmx_bool bFillGrid,
-                            gmx_bool bDoLongRange,
-                            gmx_bool bDoForces,rvec *f);
+                            gmx_bool bDoLongRangeNS);
  
 
 /* Debugging routines from wnblist.c */
index bafb60b47191cf84beb19af01cb08ec7ff97ea2a..645316b319c835238e02ebf7ad56e985c27f8fab 100644 (file)
@@ -76,6 +76,9 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+}
+#endif
 
 #define CONTINUE    '\\'
 #define COMMENTSIGN ';'
@@ -111,6 +114,21 @@ int gmx_strncasecmp(const char *str1, const char *str2, int n);
 
 char *gmx_strdup(const char *src);
 char *gmx_strndup(const char *src, int n);
+
+/* Magic hash initialization number from Dan J. Bernstein. */
+extern const unsigned int
+gmx_string_hash_init;
+
+/* Return a hash of the string according to Dan J. Bernsteins algorithm.
+ * This routine only uses characters for which isalnum(c) is true,
+ * and all characters are converted to upper case.
+ * On the first invocation for a new string, use the constant
+ * gmx_string_hash_init for the second argument. If you want to create a hash
+ * corresponding to several concatenated strings, provide the returned hash
+ * value as hash_init for the second string, etc.
+ */
+unsigned int
+gmx_string_hash_func(const char *s, unsigned int hash_init);
     
 /** Pattern matcing with wildcards. */
 int gmx_wcmatch(const char *pattern, const char *src);
@@ -138,7 +156,7 @@ char **split(char sep,char *str);
 /* Implementation of the well-known Perl function split */
 
 gmx_large_int_t str_to_large_int_t(const char *str, char **endptr);
-
+    
 #ifdef GMX_NATIVE_WINDOWS
 #define snprintf _snprintf
 #endif
index ec942425a03f15885ddc991bf73dc07bf4de42a0..262da15e2c887cf4e9cf1c4b12b47cd2e6a255eb 100644 (file)
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+}
+#endif
 
-
-void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
-                                 int ntab,int tableformat,
-                                 real dr,real beta);
-/* Fill table tabf of size ntab with spacing dr with the ewald long-range
- * (mesh) force and with tableformatF and tabv!=NULL, fill tabv energy.
- * With tableformatFDV0 the size of the tabf array should be ntab*4, tabv=NULL.
+void table_spline3_fill_ewald_lr(real *table_F,
+                                 real *table_V,
+                                 real *table_FDV0,
+                                 int   ntab,
+                                 real  dx,
+                                 real  beta);
+/* Fill tables of ntab points with spacing dr with the ewald long-range
+ * (mesh) force.
+ * There are three separate tables with format FDV0, F, and V.
  * This function interpolates the Ewald mesh potential contribution
  * with coefficient beta using a quadratic spline.
  * The force can then be interpolated linearly.
index 9bc5948fbcfece19e1e466698e5c268ec872ecd5..7b0fa51d5ce80b6368d4012ee131daaf9eca6daa 100644 (file)
@@ -39,6 +39,9 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
 
 /* note: these enums should correspond to the names in gmxlib/names.c */
 
@@ -76,15 +79,16 @@ enum {
   erscNO, erscALL, erscCOM, erscNR
 };
 
-enum { 
+enum {
   ecutsGROUP, ecutsVERLET, ecutsNR
 };
 
 /* Coulomb / VdW interaction modifiers.
  * grompp replaces eintmodPOTSHIFT_VERLET by eintmodPOTSHIFT or eintmodNONE.
+ * Exactcutoff is only used by Reaction-field-zero, and is not user-selectable.
  */
-enum {
-    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodNR
+enum eintmod {
+    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodPOTSWITCH, eintmodEXACTCUTOFF, eintmodNR
 };
 
 /*
@@ -345,6 +349,86 @@ enum {
   eAdressSITEcom,eAdressSITEcog, eAdressSITEatom, eAdressSITEatomatom, eAdressSITENR
 };
 
+
+/* The interactions contained in a (possibly merged) table
+ * for computing electrostatic, VDW repulsion and/or VDW dispersion 
+ * contributions.
+ */
+enum gmx_table_interaction
+{
+    GMX_TABLE_INTERACTION_ELEC,
+    GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
+    GMX_TABLE_INTERACTION_VDWEXPREP_VDWDISP,
+    GMX_TABLE_INTERACTION_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWEXPREP_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWDISP,
+    GMX_TABLE_INTERACTION_NR
+};
+
+/* Different formats for table data. Cubic spline tables are typically stored
+ * with the four Y,F,G,H intermediate values (check tables.c for format), which
+ * makes it easy to load with a single 4-way SIMD instruction too.
+ * Linear tables only need one value per table point, or two if both V and F
+ * are calculated. However, with SIMD instructions this makes the loads unaligned,
+ * and in that case we store the data as F, D=F(i+1)-F(i), V, and then a blank value,
+ * which again makes it possible to load as a single instruction.
+ */
+enum gmx_table_format
+{
+    GMX_TABLE_FORMAT_CUBICSPLINE_YFGH,
+    GMX_TABLE_FORMAT_LINEAR_VF,
+    GMX_TABLE_FORMAT_LINEAR_V,
+    GMX_TABLE_FORMAT_LINEAR_F,
+    GMX_TABLE_FORMAT_LINEAR_FDV0,
+    GMX_TABLE_FORMAT_NR
+};
+
+/* Neighborlist geometry type.
+ * Kernels will compute interactions between two particles, 
+ * 3-center water, 4-center water or coarse-grained beads.
+ */
+enum gmx_nblist_kernel_geometry
+{
+    GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER3_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER3_WATER3,
+    GMX_NBLIST_GEOMETRY_WATER4_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER4_WATER4,
+    GMX_NBLIST_GEOMETRY_CG_CG,
+    GMX_NBLIST_GEOMETRY_NR
+};
+
+/* Types of electrostatics calculations available inside nonbonded kernels.
+ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
+ * many interactions for instance map to tabulated kernels.
+ */
+enum gmx_nbkernel_elec
+{
+    GMX_NBKERNEL_ELEC_NONE,
+    GMX_NBKERNEL_ELEC_COULOMB,
+    GMX_NBKERNEL_ELEC_REACTIONFIELD,
+    GMX_NBKERNEL_ELEC_CUBICSPLINETABLE,
+    GMX_NBKERNEL_ELEC_GENERALIZEDBORN,
+    GMX_NBKERNEL_ELEC_EWALD,
+    GMX_NBKERNEL_ELEC_NR
+};
+
+/* Types of vdw calculations available inside nonbonded kernels.
+ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
+ * many interactions for instance map to tabulated kernels.
+ */
+enum gmx_nbkernel_vdw
+{
+    GMX_NBKERNEL_VDW_NONE,
+    GMX_NBKERNEL_VDW_LENNARDJONES,
+    GMX_NBKERNEL_VDW_BUCKINGHAM,
+    GMX_NBKERNEL_VDW_CUBICSPLINETABLE,
+    GMX_NBKERNEL_VDW_NR
+};
+
+
+
 #ifdef __cplusplus
 }
 #endif
index 759b31587a1fe604abfa12d338492512507e34a4..4b22e5b6eca520b7da4e864da6b08f05348dc8b4 100644 (file)
@@ -50,8 +50,8 @@ extern "C" {
 #define GMX_FORCE_DYNAMICBOX   (1<<1)
 /* Do neighbor searching */
 #define GMX_FORCE_NS           (1<<2)
-/* Calculate long-range energies/forces */
-#define GMX_FORCE_DOLR         (1<<3)
+/* Update long-range neighborlists */
+#define GMX_FORCE_LRNS         (1<<3)
 /* Calculate bonded energies/forces */
 #define GMX_FORCE_BONDED       (1<<4)
 /* Store long-range forces in a separate array */
@@ -66,6 +66,9 @@ extern "C" {
 #define GMX_FORCE_ENERGY       (1<<9)
 /* Calculate dHdl */
 #define GMX_FORCE_DHDL         (1<<10)
+/* Calculate long-range energies/forces */
+#define GMX_FORCE_DO_LR        (1<<11)
+
 /* Normally one want all energy terms and forces */
 #define GMX_FORCE_ALLFORCES    (GMX_FORCE_BONDED | GMX_FORCE_NONBONDED | GMX_FORCE_FORCES)
 
index c94e382a089478cd8e3e249f5b654322d03828f0..a5b8c3e15e831fdd88e9494ee7c66fb3cdc7050e 100644 (file)
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
 
 /* Abstract type for PME that is defined only in the routine that use them. */
 typedef struct gmx_pme *gmx_pme_t;
 
-typedef struct {
-  real r;         /* range of the table */
-  int  n;         /* n+1 is the number of points */
-  real scale;     /* distance between two points */
-  real scale_exp; /* distance for exponential Buckingham table */
-  real *tab;      /* the actual tables, per point there are  4 numbers for
-                  * Coulomb, dispersion and repulsion (in total 12 numbers)
-                  */
+
+
+/* Structure describing the data in a single table */
+typedef struct
+{
+    enum gmx_table_interaction  interaction; /* Types of interactions stored in this table */
+    enum gmx_table_format       format;      /* Interpolation type and data format */
+
+    real                        r;         /* range of the table */
+    int                         n;         /* n+1 is the number of table points */
+    real                        scale;     /* distance (nm) between two table points */
+    real                        scale_exp; /* distance for exponential part of VdW table, not always used */
+    real *                      data;      /* the actual table data */
+
+    /* Some information about the table layout. This can also be derived from the interpolation
+     * type and the table interactions, but it is convenient to have here for sanity checks, and it makes it
+     * much easier to access the tables in the nonbonded kernels when we can set the data from variables.
+     * It is always true that stride = formatsize*ninteractions
+     */
+    int                         formatsize;    /* Number of fp variables for each table point (1 for F, 2 for VF, 4 for YFGH, etc.) */
+    int                         ninteractions; /* Number of interactions in table, 1 for coul-only, 3 for coul+rep+disp. */
+    int                         stride;        /* Distance to next table point (number of fp variables per table point in total) */
 } t_forcetable;
 
-typedef struct {
-  t_forcetable tab;
-  /* We duplicate tables for cache optimization purposes */
-  real *coultab;      /* Coul only */
-  real *vdwtab;       /* Vdw only   */
-  /* The actual neighbor lists, short and long range, see enum above
-   * for definition of neighborlist indices.
-   */
-  t_nblist nlist_sr[eNL_NR];
-  t_nblist nlist_lr[eNL_NR];
+typedef struct
+{
+    t_forcetable   table_elec;
+    t_forcetable   table_vdw;
+    t_forcetable   table_elec_vdw;
+
+    /* The actual neighbor lists, short and long range, see enum above
+     * for definition of neighborlist indices.
+     */
+    t_nblist nlist_sr[eNL_NR];
+    t_nblist nlist_lr[eNL_NR];
 } t_nblists;
 
 /* macros for the cginfo data in forcerec */
@@ -173,6 +191,20 @@ typedef struct {
   gmx_hw_info_t *hwinfo;
   gmx_bool      use_cpu_acceleration;
 
+  /* Interaction for calculated in kernels. In many cases this is similar to
+   * the electrostatics settings in the inputrecord, but the difference is that
+   * these variables always specify the actual interaction in the kernel - if
+   * we are tabulating reaction-field the inputrec will say reaction-field, but
+   * the kernel interaction will say cubic-spline-table. To be safe we also
+   * have a kernel-specific setting for the modifiers - if the interaction is
+   * tabulated we already included the inputrec modification there, so the kernel
+   * modification setting will say 'none' in that case.
+   */
+  int nbkernel_elec_interaction;
+  int nbkernel_vdw_interaction;
+  int nbkernel_elec_modifier;
+  int nbkernel_vdw_modifier;
+
   /* Use special N*N kernels? */
   gmx_bool bAllvsAll;
   /* Private work data */
@@ -183,7 +215,7 @@ typedef struct {
    * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0).
    */
   real rlist,rlistlong;
-  
+
   /* Dielectric constant resp. multiplication factor for charges */
   real zsquare,temp;
   real epsilon_r,epsilon_rf,epsfac;  
@@ -198,6 +230,7 @@ typedef struct {
 
   /* Dispersion correction stuff */
   int  eDispCorr;
+
   /* The shift of the shift or user potentials */
   real enershiftsix;
   real enershifttwelve;
@@ -224,12 +257,12 @@ typedef struct {
   t_forcetable tab14; /* for 1-4 interactions only */
 
   /* PPPM & Shifting stuff */
-  gmx_bool coul_pot_shift;
+  int coulomb_modifier;
   real rcoulomb_switch,rcoulomb;
   real *phi;
 
   /* VdW stuff */
-  gmx_bool vdw_pot_shift;
+  int vdw_modifier;
   double reppow;
   real rvdw_switch,rvdw;
   real bham_b_max;
@@ -266,7 +299,7 @@ typedef struct {
   int  *gid2nblists;
   t_nblists *nblists;
 
-  int      cutoff_scheme; /* old- or Verlet-style cutoff */
+  int cutoff_scheme; /* group- or Verlet-style cutoff */
   gmx_bool bNonbonded;    /* true if nonbonded calculations are *not* turned off */
   nonbonded_verlet_t *nbv;
 
@@ -427,6 +460,10 @@ typedef struct {
   int  *excl_load;
 } t_forcerec;
 
+/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
+ * been scaled by 6.0 or 12.0 to save flops in the kernels. We have corrected this everywhere
+ * in the code, but beware if you are using these macros externally.
+ */
 #define C6(nbfp,ntp,ai,aj)     (nbfp)[2*((ntp)*(ai)+(aj))]
 #define C12(nbfp,ntp,ai,aj)    (nbfp)[2*((ntp)*(ai)+(aj))+1]
 #define BHAMC(nbfp,ntp,ai,aj)  (nbfp)[3*((ntp)*(ai)+(aj))]
index 7d44e4ccb2a881627feb6a51ec03d20fb22ba382..635c4e7141ed1596d46857c02ccafb244adceb1f 100644 (file)
@@ -318,7 +318,7 @@ typedef struct
 typedef struct {
   int  n;         /* n+1 is the number of points */
   real scale;     /* distance between two points */
-  real *tab;      /* the actual tables, per point there are  4 numbers */
+  real *data;     /* the actual table data, per point there are 4 numbers */
 } bondedtable_t;
 
 #ifdef __cplusplus
index b62fc949759cc3b1a22a9ae5e5b471e5c3f5dc07..a3d9c6fd7fd7c327eb62a2810ec4ee3124c6bddb 100644 (file)
@@ -270,7 +270,7 @@ typedef struct {
   int  simulation_part; /* Used in checkpointing to separate chunks */
   gmx_large_int_t init_step;   /* start at a stepcount >0 (used w. tpbconv)    */
   int  nstcalcenergy;  /* frequency of energy calc. and T/P coupl. upd.        */
-  int  cutoff_scheme;   /* cut-off scheme: group or verlet              */
+  int  cutoff_scheme;   /* group or verlet cutoffs     */
   int  ns_type;                /* which ns method should we use?               */
   int  nstlist;                /* number of steps before pairlist is generated */
   int  ndelta;         /* number of cells per rlong                    */
@@ -313,8 +313,9 @@ typedef struct {
   rvec posres_comB;     /* The B-state COM of the posres atoms          */
   int  andersen_seed;   /* Random seed for Andersen thermostat (obsolete) */
   real verletbuf_drift; /* Max. drift (kJ/mol/ps/atom) for list buffer  */
-  real rlist;          /* short range pairlist cut-off (nm)            */
-  real rlistlong;      /* long range pairlist cut-off (nm)             */
+  real rlist;              /* short range pairlist cut-off (nm)                */
+  real rlistlong;          /* long range pairlist cut-off (nm)         */
+  int  nstcalclr;       /* Frequency of evaluating direct space long-range interactions */
   real rtpi;            /* Radius for test particle insertion           */
   int  coulombtype;    /* Type of electrostatics treatment             */
   int  coulomb_modifier; /* Modify the Coulomb interaction              */
index 842d25b6224f02b60764580817cb2fbbca02e7b2..52f343bbca5ed92f8dc2c99d4871c5e670d5f1e6 100644 (file)
@@ -40,8 +40,6 @@
 extern "C" {
 #endif
 
-enum { tableformatNONE, tableformatF, tableformatFDV0 };
-
 typedef struct {
     /* VdW */
     real rvdw;
@@ -55,7 +53,8 @@ typedef struct {
 
     /* Cut-off */
     real rlist;
-
+    real rlistlong;
+    
     /* PME/Ewald */
     real ewaldcoeff;
     real sh_ewald;   /* For shifting the Ewald potential */
@@ -72,7 +71,6 @@ typedef struct {
     /* Force/energy interpolation tables, linear in force, quadratic in V */
     real tabq_scale;
     int  tabq_size;
-    int  tabq_format;
     /* Coulomb force table, size of array is tabq_size (when used) */
     real *tabq_coul_F;
     /* Coulomb energy table, size of array is tabq_size (when used) */
index ae5aea1853383ac0d009469cb3e8842694d8ea22..1e189cd7ef9a3eb5736426ad192eb507694d3b12 100644 (file)
 extern "C" {
 #endif
 
-/* Neighborlist type */
-enum {
-  enlistATOM_ATOM,
-  enlistSPC_ATOM,   enlistSPC_SPC,
-  enlistTIP4P_ATOM, enlistTIP4P_TIP4P,
-  enlistCG_CG,
-  enlistNR
-};
 
 typedef unsigned long t_excl;
 
@@ -62,26 +54,31 @@ typedef unsigned long t_excl;
 
 typedef struct 
 {
-  int             enlist;      /* The type of nblist, enum, see above    */
-  int             il_code;      /* Innerloop index from nrnb.h, used     */
-                                /* for flop accounting.                  */
-  int             icoul;        /* Coulomb loop type index for kernels   */
-  int             ivdw;         /* VdW loop type index for kernels       */
-  int             free_energy;  /* Free energy setting for this list     */
+    int             igeometry;    /* The type of list (atom, water, etc.)  */
+    int             ielec;        /* Coulomb loop type index for kernels   */
+    int             ielecmod;     /* Coulomb modifier (e.g. switch/shift)  */
+    int             ivdw;         /* VdW loop type index for kernels       */
+    int             ivdwmod;      /* VdW modifier (e.g. switch/shift)      */
+    int             free_energy;  /* Free energy setting for this list     */
 
-  int             nri,maxnri;   /* Current/max number of i particles    */
-  int             nrj,maxnrj;   /* Current/max number of j particles    */
-  int             maxlen;       /* maxnr of j atoms for a single i atom  */
-  int *           iinr;                /* The i-elements                        */
-  int *           iinr_end;     /* The end atom, only with enlistCG      */
-  int *           gid;          /* Index in energy arrays                */
-  int *           shift;        /* Shift vector index                    */
-  int *           jindex;       /* Index in jjnr                         */
-  int *           jjnr;                /* The j-atom list                       */
-  int *           jjnr_end;     /* The end atom, only with enltypeCG     */
-  t_excl *        excl;         /* Exclusions, only with enltypeCG       */
-  int             count;        /* counter to multithread the innerloops */
-  void *          mtx;          /* mutex to lock the counter             */
+    int             nri,maxnri;   /* Current/max number of i particles    */
+    int             nrj,maxnrj;   /* Current/max number of j particles    */
+    int             maxlen;       /* maxnr of j atoms for a single i atom  */
+    int *           iinr;         /* The i-elements                        */
+    int *           iinr_end;     /* The end atom, only with enlistCG      */
+    int *           gid;          /* Index in energy arrays                */
+    int *           shift;        /* Shift vector index                    */
+    int *           jindex;       /* Index in jjnr                         */
+    int *           jjnr;         /* The j-atom list                       */
+    int *           jjnr_end;     /* The end atom, only with enltypeCG     */
+    t_excl *        excl;         /* Exclusions, only with enltypeCG       */
+
+    /* We use separate pointers for kernels that compute both potential
+     * and force (vf suffix), only potential (v) or only force (f)
+     */
+    void *          kernelptr_vf;
+    void *          kernelptr_v;
+    void *          kernelptr_f;
 } t_nblist;
 
 
index aca5408c27f661478a4341f52e67d5be0b034ce1..0be7f1cd71144412a4d7b8772ef31b6e4247a881 100644 (file)
 #ifdef __cplusplus
 extern "C" {
 #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
 
-/* The nonbonded kernels are documented in gmxlib/nonbonded_kernels, 
- * but here's a lazy version of the numbering. The first position
- * is the Coulomb interaction (0 for none), second is Van der Waals
- * (again, 0 means no interaction), and the third is the water optimization
- * (0 meaning no water optimization = standard atom-atom loop)
- *
- *                                     value
- * pos                 1                   2           3              4
- * 1st Coul        Normal,1/r       Reaction-field  Table            Generalized born
- * 2nd Vdw         Lennard-Jones    Buckingham      Table             n/a
- * 3rd Water. opt  SPC-other atom   SPC-SPC         TIP4p-other at.  TIP4p-TIP4p 
- */
 
 #define eNR_NBKERNEL_NONE -1
 
 enum 
 {
-    eNR_NBKERNEL010, eNR_NBKERNEL020, eNR_NBKERNEL030,
-    eNR_NBKERNEL100, eNR_NBKERNEL101, eNR_NBKERNEL102, eNR_NBKERNEL103, eNR_NBKERNEL104,
-    eNR_NBKERNEL110, eNR_NBKERNEL111, eNR_NBKERNEL112, eNR_NBKERNEL113, eNR_NBKERNEL114,
-    eNR_NBKERNEL120, eNR_NBKERNEL121, eNR_NBKERNEL122, eNR_NBKERNEL123, eNR_NBKERNEL124,
-    eNR_NBKERNEL130, eNR_NBKERNEL131, eNR_NBKERNEL132, eNR_NBKERNEL133, eNR_NBKERNEL134,
-    eNR_NBKERNEL200, eNR_NBKERNEL201, eNR_NBKERNEL202, eNR_NBKERNEL203, eNR_NBKERNEL204,
-    eNR_NBKERNEL210, eNR_NBKERNEL211, eNR_NBKERNEL212, eNR_NBKERNEL213, eNR_NBKERNEL214,
-    eNR_NBKERNEL220, eNR_NBKERNEL221, eNR_NBKERNEL222, eNR_NBKERNEL223, eNR_NBKERNEL224,
-    eNR_NBKERNEL230, eNR_NBKERNEL231, eNR_NBKERNEL232, eNR_NBKERNEL233, eNR_NBKERNEL234,
-    eNR_NBKERNEL300, eNR_NBKERNEL301, eNR_NBKERNEL302, eNR_NBKERNEL303, eNR_NBKERNEL304,
-    eNR_NBKERNEL310, eNR_NBKERNEL311, eNR_NBKERNEL312, eNR_NBKERNEL313, eNR_NBKERNEL314,
-    eNR_NBKERNEL320, eNR_NBKERNEL321, eNR_NBKERNEL322, eNR_NBKERNEL323, eNR_NBKERNEL324,
-    eNR_NBKERNEL330, eNR_NBKERNEL331, eNR_NBKERNEL332, eNR_NBKERNEL333, eNR_NBKERNEL334,
-    eNR_NBKERNEL400, eNR_NBKERNEL410, eNR_NBKERNEL430,
-    eNR_NBKERNEL010NF, eNR_NBKERNEL020NF, eNR_NBKERNEL030NF,
-    eNR_NBKERNEL100NF, eNR_NBKERNEL101NF, eNR_NBKERNEL102NF, eNR_NBKERNEL103NF, eNR_NBKERNEL104NF,
-    eNR_NBKERNEL110NF, eNR_NBKERNEL111NF, eNR_NBKERNEL112NF, eNR_NBKERNEL113NF, eNR_NBKERNEL114NF,
-    eNR_NBKERNEL120NF, eNR_NBKERNEL121NF, eNR_NBKERNEL122NF, eNR_NBKERNEL123NF, eNR_NBKERNEL124NF,
-    eNR_NBKERNEL130NF, eNR_NBKERNEL131NF, eNR_NBKERNEL132NF, eNR_NBKERNEL133NF, eNR_NBKERNEL134NF,
-    eNR_NBKERNEL200NF, eNR_NBKERNEL201NF, eNR_NBKERNEL202NF, eNR_NBKERNEL203NF, eNR_NBKERNEL204NF,
-    eNR_NBKERNEL210NF, eNR_NBKERNEL211NF, eNR_NBKERNEL212NF, eNR_NBKERNEL213NF, eNR_NBKERNEL214NF,
-    eNR_NBKERNEL220NF, eNR_NBKERNEL221NF, eNR_NBKERNEL222NF, eNR_NBKERNEL223NF, eNR_NBKERNEL224NF,
-    eNR_NBKERNEL230NF, eNR_NBKERNEL231NF, eNR_NBKERNEL232NF, eNR_NBKERNEL233NF, eNR_NBKERNEL234NF,
-    eNR_NBKERNEL300NF, eNR_NBKERNEL301NF, eNR_NBKERNEL302NF, eNR_NBKERNEL303NF, eNR_NBKERNEL304NF,
-    eNR_NBKERNEL310NF, eNR_NBKERNEL311NF, eNR_NBKERNEL312NF, eNR_NBKERNEL313NF, eNR_NBKERNEL314NF,
-    eNR_NBKERNEL320NF, eNR_NBKERNEL321NF, eNR_NBKERNEL322NF, eNR_NBKERNEL323NF, eNR_NBKERNEL324NF,
-    eNR_NBKERNEL330NF, eNR_NBKERNEL331NF, eNR_NBKERNEL332NF, eNR_NBKERNEL333NF, eNR_NBKERNEL334NF,
-    eNR_NBKERNEL400NF, eNR_NBKERNEL410NF, eNR_NBKERNEL430NF, 
-    eNR_NBKERNEL_NR,
-    eNR_NBKERNEL_FREE_ENERGY = eNR_NBKERNEL_NR,
+    eNR_NBKERNEL_VDW_VF,
+    eNR_NBKERNEL_VDW_F,
+    eNR_NBKERNEL_ELEC_VF,
+    eNR_NBKERNEL_ELEC_F,
+    eNR_NBKERNEL_ELEC_W3_VF,
+    eNR_NBKERNEL_ELEC_W3_F,
+    eNR_NBKERNEL_ELEC_W3W3_VF,
+    eNR_NBKERNEL_ELEC_W3W3_F,
+    eNR_NBKERNEL_ELEC_W4_VF,
+    eNR_NBKERNEL_ELEC_W4_F,
+    eNR_NBKERNEL_ELEC_W4W4_VF,
+    eNR_NBKERNEL_ELEC_W4W4_F,
+    eNR_NBKERNEL_ELEC_VDW_VF,
+    eNR_NBKERNEL_ELEC_VDW_F,
+    eNR_NBKERNEL_ELEC_VDW_W3_VF,
+    eNR_NBKERNEL_ELEC_VDW_W3_F,
+    eNR_NBKERNEL_ELEC_VDW_W3W3_VF,
+    eNR_NBKERNEL_ELEC_VDW_W3W3_F,
+    eNR_NBKERNEL_ELEC_VDW_W4_VF,
+    eNR_NBKERNEL_ELEC_VDW_W4_F,
+    eNR_NBKERNEL_ELEC_VDW_W4W4_VF,
+    eNR_NBKERNEL_ELEC_VDW_W4W4_F,
+
+    eNR_NBKERNEL_NR,  /* Total number of interaction-specific kernel entries */
+
+    eNR_NBKERNEL_GENERIC = eNR_NBKERNEL_NR, /* Reuse number; KERNEL_NR is not an entry itself */
+    eNR_NBKERNEL_FREE_ENERGY,               /* Add other generic kernels _before_ the free energy one */
+
     eNR_NBKERNEL_ALLVSALL,
     eNR_NBKERNEL_ALLVSALLGB,
-    eNR_NBKERNEL_OUTER,
+
     eNR_NBNXN_DIST2,
     eNR_NBNXN_LJ_RF,  eNR_NBNXN_LJ_RF_E,
     eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
@@ -122,14 +111,16 @@ enum
     eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
     eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
     eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
-       eNR_CMAP,
+    eNR_CMAP,
     eNRNB
 };
 
 
-typedef struct {
-  double n[eNRNB];
-} t_nrnb;
+typedef struct
+{
+    double n[eNRNB];
+}
+t_nrnb;
 
 
 typedef struct gmx_wallcycle *gmx_wallcycle_t;
index c9be7a06c1e46018cfad1208551de544a4e0ab33..72868210f9b5be2f5376edbe7a02cb9081faaad8 100644 (file)
@@ -217,6 +217,12 @@ typedef int gmx_large_int_t;
 #endif
     
     
+#ifndef gmx_inline
+/* config.h tests for inline definitions and should work on a much wider range
+ * of compilers, but does not work with installed headers. These compiler checks
+ * still enable a real inline keyword for the most common compilers.
+ */
+
 /* Try to define suitable inline keyword for gmx_inline.
  * Set it to empty if we cannot find one (and dont complain to the user)
  */
@@ -245,11 +251,18 @@ typedef int gmx_large_int_t;
 #endif
 
 #else
+/* C++ */
 #  define gmx_inline inline
 #endif
 
+#endif /* ifndef gmx_inline */
+
+
+/* Restrict keywords. Note that this has to be done for C++ too, unless
+ * it was set from the more general checks if we had config.h (gmx internal)
+ */
+#ifndef gmx_restrict
 
-/* Restrict keywords. Note that this has to be done for C++ too. */
 #ifdef __GNUC__
 /* GCC */
 #  define gmx_restrict   __restrict__
@@ -272,6 +285,7 @@ typedef int gmx_large_int_t;
 #  define gmx_restrict
 #endif
 
+#endif
 
 /* Standard sizes for char* string buffers */
 #define STRLEN 4096
index cbf570674450b739b05ab614f31ff0365c6306f6..fc74939b727d912ad2153f2db1cdf15085089e43 100644 (file)
@@ -210,15 +210,27 @@ static real gmx_invsqrt(real x)
 #define INVSQRT_DONE
 #endif /* powerpc_invsqrt */
 
-
 #ifndef INVSQRT_DONE
-#define gmx_invsqrt(x) (1.0f/sqrt(x))
+#    ifdef GMX_DOUBLE
+#        ifdef HAVE_RSQRT
+#            define gmx_invsqrt(x)     rsqrt(x)
+#        else
+#            define gmx_invsqrt(x)     (1.0/sqrt(x))
+#        endif
+#    else /* single */
+#        ifdef HAVE_RSQRTF
+#            define gmx_invsqrt(x)     rsqrtf(x)
+#        elif defined HAVE_RSQRT
+#            define gmx_invsqrt(x)     rsqrt(x)
+#        elif defined HAVE_SQRTF
+#            define gmx_invsqrt(x)     (1.0/sqrtf(x))
+#        else
+#            define gmx_invsqrt(x)     (1.0/sqrt(x))
+#        endif
+#    endif
 #endif
 
 
-
-
-
 static real sqr(real x)
 {
   return (x*x);
@@ -522,8 +534,10 @@ static gmx_inline real norm(const rvec a)
    * float/double case here instead to avoid gmx_sqrt() being accidentally used. */
 #ifdef GMX_DOUBLE
   return dnorm(a);
-#else
+#elif defined HAVE_SQRTF
   return sqrtf(iprod(a, a));
+#else
+  return sqrt(iprod(a, a));
 #endif
 }
 
index c51626034ef3ceeef70b547f8d7090dec6c4711d..637cd9561dcb30687f9c5d3b458f38ba818866df 100644 (file)
@@ -35,7 +35,7 @@ IF YOU'RE NOT SURE ABOUT WHAT YOU'RE DOING, DON'T DO IT!
 <li><a HREF="#xmdrun"><b>shell molecular dynamics</b></a>(emtol,niter,fcstep)
 <li><a HREF="#tpi"><b>test particle insertion</b></a>(rtpi)
 <li><A HREF="#out"><b>output control</b></A> (nstxout, nstvout, nstfout, nstlog, nstcalcenergy, nstenergy, nstxtcout, xtc-precision, xtc-grps, energygrps)
-<li><A HREF="#nl"><b>neighbor searching</b></A> (cutoff-scheme, nstlist, ns-type, pbc, periodic-molecules, verlet-buffer-drift, rlist, rlistlong)
+<li><A HREF="#nl"><b>neighbor searching</b></A> (cutoff-scheme, nstlist, nstcalclr, ns-type, pbc, periodic-molecules, verlet-buffer-drift, rlist, rlistlong)
 <li><A HREF="#el"><b>electrostatics</b></A> (coulombtype, coulomb-modifier, rcoulomb-switch, rcoulomb, epsilon-r, epsilon-rf)
 <li><A HREF="#vdw"><b>VdW</b></A> (vdwtype, vdw-modifier, rvdw-switch, rvdw, DispCorr)
 <li><A HREF="#table"><b>tables</b></A> (table-extension, energygrp-table)
@@ -442,6 +442,42 @@ while 99.99% of the particles are fine.
 </dd>
 </dl></dd>
 
+<dt><b>nstcalclr: (-1) [steps]</b></dt>
+<dd><dl compact>
+Controls the period between calculations of long-range forces. Useful
+only with <b>cutoff-scheme</b>=<b>group</b>.
+<dt><b>1</b></dt>
+<dd>Calculate the long-range forces every single step. This is useful
+to have separate neighbor lists with buffers for electrostatics and Van
+der Waals interactions, and in particular it makes it possible to have
+the Van der Waals cutoff longer than electrostatics (useful e.g. with
+PME). However, there is no point in having identical long-range
+cutoffs for both interaction forms and update them every step - then
+it will be slightly faster to put everything in the short-range
+list.</dd>
+<dt><b>&gt;1</b></dt>
+<dd>Calculate the long-range forces every <b>nstcalclr</b> steps and
+use a multiple-time-step integrator to combine forces. This can now be
+done more frequently than <b>nstlist</b> since the lists are stored,
+and it might be a good idea e.g. for Van der Waals interactions that
+vary slower than electrostatics.</dd>
+<dt><b>-1</b></dt>
+<dd>Calculate long-range forces on steps where neighbor searching is
+performed. While this is the default value, you might want to consider
+updating the long-range forces more frequently.</dd>
+Note that twin-range force evaluation might be enabled automatically
+by PP-PME load balancing. This is done in order to maintain the chosen
+Van der Waals interaction radius even if the load balancing is
+changing the electrostatics cutoff. If the <tt>.mdp</tt> file already
+specifies twin-range interactions (e.g. to evaluate Lennard-Jones
+interactions with a longer cutoff than the PME electrostatics every
+2-3 steps), the load balancing will have also a small effect on
+Lennard-Jones, since the short-range cutoff (inside which forces are
+evaluated every step) is changed.</dl>
+</dd>
+
+
+
 <dt><b>ns-type:</b></dt>
 <dd><dl compact>
 <dt><b>grid</b></dt>
index ea0d65d47fc75623dafc2a0021d5ea397b61e95d..bd115207deeb5140a3c149874d9afc5da8f2c579 100644 (file)
 /* Define to 1 if you have the sigaction() function. */
 #cmakedefine HAVE_SIGACTION
 
+/* Define to 1 if you have the rsqrt() function. */
+#cmakedefine HAVE_RSQRT
+
+/* Define to 1 if you have the rsqrtf() function. */
+#cmakedefine HAVE_RSQRTF
+
+/* Define to 1 if you have the sqrtf() function. */
+#cmakedefine HAVE_SQRTF
+
 /* Define to 1 if you have the <string.h> header file. */
 #cmakedefine HAVE_STRING_H
 
 #cmakedefine gid_t int
 
 /* Define to __inline__ or __inline if that is what the C compiler
-   calls it, or to nothing if inline is not supported under any name.  */
+   calls it, or to nothing if inline is not supported under any name.
+   Please do NOT remove the gmx_inline keyword from here. The classical
+   C++ inline keyword is merely a recommendation to the compiler, and
+   many compilers support stronger alternatives (e.g. __forceinline)
+   that we might want to use. */
+#define gmx_inline ${INLINE_KEYWORD}
 #ifndef __cplusplus
 #define inline ${INLINE_KEYWORD}
 #endif
 
 /* Define to __restrict__ or __restrict if that is what the C compiler
-   calls it, or to nothing if restrict is not supported under any name.  */
-#define restrict ${RESTRICT_KEYWORD}
+   calls it, unless we are on C99 when it is simply called restrict.
+   Since restrict is a reserved key word in C99 we are not allowed to
+   redefine the word itself, so call this gmx_restrict to avoid having
+   to identify the language standard level. If it is not supported, it
+   is still defined to an empty string here. */
+#define gmx_restrict ${RESTRICT_KEYWORD}
 
 #ifndef CPLUSPLUS
 #ifdef __cplusplus
index 2d49c9738c3d56eb371bb66a343959ae40761c6c..5058b899c42d1f5b273d7c9e81db5790eff7fbcb 100644 (file)
@@ -3023,7 +3023,7 @@ static real bonded_tab(const char *type,int table_nr,
   k = (1.0 - lambda)*kA + lambda*kB;
 
   tabscale = table->scale;
-  VFtab    = table->tab;
+  VFtab    = table->data;
   
   rt    = r*tabscale;
   n0    = rt;
@@ -3579,11 +3579,11 @@ static real calc_one_bond(FILE *fplog,int thread,
         }
         else
         {
-            v = do_listed_vdw_q(ftype,nbn,iatoms+nb0,
-                                idef->iparams,
-                                (const rvec*)x,f,fshift,
-                                pbc,g,lambda,dvdl,
-                                md,fr,grpp,global_atom_index);
+            v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
+                                    pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
+
+            enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
+            enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
             
             if (bPrintSepPot)
             {
@@ -3666,11 +3666,11 @@ static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
                 }
                 else
                 {
-                    v = do_listed_vdw_q(ftype,nbonds,iatoms,
-                                        idef->iparams,
-                                        (const rvec*)x,f,fr->fshift,
-                                        pbc,g,lambda,dvdl,
-                                        md,fr,&enerd->grpp,global_atom_index);
+                    v = do_nonbonded_listed(ftype,nbonds,iatoms,
+                                            idef->iparams,
+                                            (const rvec*)x,f,fr->fshift,
+                                            pbc,g,lambda,dvdl,
+                                            md,fr,&enerd->grpp,global_atom_index);
                 }
                 if (ind != -1)
                 {
index 0d428e8f470564c1986806e720f83c76db0e15d9..356785630b2a155d3b60fdceaeebe5911d12acef 100644 (file)
@@ -95,7 +95,7 @@ const char *econstr_names[econtNR+1] = {
 };
 
 const char *eintmod_names[eintmodNR+1] = { 
-  "Potential-shift-Verlet","Potential-shift","None", NULL
+  "Potential-shift-Verlet","Potential-shift","None","Potential-switch","Exact-cutoff", NULL
 };
 
 const char *egrp_nm[egNR+1] = { 
@@ -176,10 +176,6 @@ const char *esol_names[esolNR+1] = {
   "No", "SPC", "TIP4p", NULL
 };
 
-const char *enlist_names[enlistNR+1] = {
-  "Atom-Atom", "SPC-Atom", "SPC-SPC", "TIP4p-Atom", "TIP4p-TIP4p", "CG-CG", NULL
-};
-
 const char *edispc_names[edispcNR+1] = {
   "No", "EnerPres", "Ener", "AllEnerPres", "AllEner", NULL
 };
@@ -257,3 +253,19 @@ const char *eAdressSITEtype_names[eAdressSITENR+1] = {
   "com","cog", "atom", "atomperatom", NULL
 };
 
+const char *gmx_nblist_geometry_names[GMX_NBLIST_GEOMETRY_NR+1] = {
+    "Particle-Particle", "Water3-Particle", "Water3-Water3", "Water4-Particle", "Water4-Water4", "CG-CG", NULL
+};
+
+const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1] =
+{
+    "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Generalized-Born", "Ewald", NULL
+};
+
+const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1] =
+{
+    "None", "Lennard-Jones", "Buckingham", "Cubic-Spline-Table", NULL
+};
+
+
+
index 2c3a658eb39e12851d1fcaa20f3861293327049c..74bb4f80e36f22d4cbb315c359a824569bafb2ed 100644 (file)
@@ -1,24 +1,5 @@
 # Sources that should always be built
-file(GLOB NONBONDED_SOURCES *.c nb_kernel_c/*.c nb_kernel_adress_c/*.c)
-
-# Conditionally included ones
-if(GMX_FORTRAN)
-  if (GMX_DOUBLE)
-    file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_f77_double/*.[cf])
-  else(GMX_DOUBLE)
-    file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_f77_single/*.[cf])
-  endif(GMX_DOUBLE)
-endif(GMX_FORTRAN)
-
-if(GMX_POWER6)
-  file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_power6/*.[cF])
-endif(GMX_POWER6)
-
-if(GMX_BLUEGENE)
-  file(GLOB NB_KERNEL_BLUEGENE_SOURCES nb_kernel_bluegene/*.c)
-endif(GMX_BLUEGENE)
-
-list(APPEND NONBONDED_SOURCES ${NB_KERNEL_FORTRAN_SOURCES} ${NB_KERNEL_BLUEGENE_SOURCES})
+file(GLOB NONBONDED_SOURCES *.c nb_kernel_c/*.c)
 
 # These sources will be used in the parent directory's CMakeLists.txt
 set(NONBONDED_SOURCES ${NONBONDED_SOURCES} PARENT_SCOPE)
index 941be19a201e653fb345fc8d63a4b1a1e647287e..9400c8a89927666e7d85b16cfa16d49adc2d4554 100644 (file)
 
 #include "vec.h"
 #include "typedefs.h"
+#include "nonbonded.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
 
 void
-gmx_nb_free_energy_kernel(int                  icoul,
-                          int                  ivdw,
-                          int                  nri,
-                          int *                iinr,
-                          int *                jindex,
-                          int *                jjnr,
-                          int *                shift,
-                          real *               shiftvec,
-                          real *               fshift,
-                          int *                gid,
-                          real *               x,
-                          real *               f,
-                          real *               chargeA,
-                          real *               chargeB,
-                          real                 facel,
-                          real                 krf,
-                          real                 crf,
-                          real                 ewc,
-                          real *               Vc,
-                          int *                typeA,
-                          int *                typeB,
-                          int                  ntype,
-                          real *               nbfp,
-                          real *               Vv,
-                          real                 tabscale,
-                          real *               VFtab,
-                          real                 lambda_coul,
-                          real                 lambda_vdw,
-                          real *               dvdl,
-                          real                 alpha_coul,
-                          real                 alpha_vdw,
-                          int                  lam_power,
-                          real                 sc_r_power,
-                          real                 sigma6_def,
-                          real                 sigma6_min,
-                          gmx_bool             bDoForces,
-                          int *                outeriter,
-                          int *                inneriter)
+gmx_nb_free_energy_kernel(t_nblist *                nlist,
+                          rvec *                    xx,
+                          rvec *                    ff,
+                          t_forcerec *              fr,
+                          t_mdatoms *               mdatoms,
+                          nb_kernel_data_t *        kernel_data,
+                          t_nrnb *                  nrnb)
 {
 
 #define  STATE_A  0
@@ -108,7 +80,112 @@ gmx_nb_free_energy_kernel(int                  icoul,
     int           n0,n1C,n1V,nnn;
     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
     double        isp=0.564189583547756;
+    int           icoul,ivdw;
+    int           nri;
+    int *         iinr;
+    int *         jindex;
+    int *         jjnr;
+    int *         shift;
+    int *         gid;
+    int *         typeA;
+    int *         typeB;
+    int           ntype;
+    real *        shiftvec;
     real          dvdl_part;
+    real *        fshift;
+    real          tabscale;
+    real *        VFtab;
+    real *        x;
+    real *        f;
+    real          facel,krf,crf;
+    real *        chargeA;
+    real *        chargeB;
+    real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
+    real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
+    real *        nbfp;
+    real *        dvdl;
+    real *        Vv;
+    real *        Vc;
+    gmx_bool      bDoForces;
+    real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
+    gmx_bool      bExactElecCutoff,bExactVdwCutoff;
+    real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
+
+    x                   = xx[0];
+    f                   = ff[0];
+
+    fshift              = fr->fshift[0];
+    Vc                  = kernel_data->energygrp_elec;
+    Vv                  = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+
+    nri                 = nlist->nri;
+    iinr                = nlist->iinr;
+    jindex              = nlist->jindex;
+    jjnr                = nlist->jjnr;
+    icoul               = nlist->ielec;
+    ivdw                = nlist->ivdw;
+    shift               = nlist->shift;
+    gid                 = nlist->gid;
+
+    shiftvec            = fr->shift_vec[0];
+    chargeA             = mdatoms->chargeA;
+    chargeB             = mdatoms->chargeB;
+    facel               = fr->epsfac;
+    krf                 = fr->k_rf;
+    crf                 = fr->c_rf;
+    ewc                 = fr->ewaldcoeff;
+    Vc                  = kernel_data->energygrp_elec;
+    typeA               = mdatoms->typeA;
+    typeB               = mdatoms->typeB;
+    ntype               = fr->ntype;
+    nbfp                = fr->nbfp;
+    Vv                  = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+    lambda_coul         = kernel_data->lambda[efptCOUL];
+    lambda_vdw          = kernel_data->lambda[efptVDW];
+    dvdl                = kernel_data->dvdl;
+    alpha_coul          = fr->sc_alphacoul;
+    alpha_vdw           = fr->sc_alphavdw;
+    lam_power           = fr->sc_power;
+    sc_r_power          = fr->sc_r_power;
+    sigma6_def          = fr->sc_sigma6_def;
+    sigma6_min          = fr->sc_sigma6_min;
+    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
+
+    rcoulomb            = fr->rcoulomb;
+    rvdw                = fr->rvdw;
+    sh_invrc6           = fr->ic->sh_invrc6;
+
+    if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
+    {
+        rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
+        rcutoff2        = rcutoff*rcutoff;
+        rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
+        d               = rcutoff-rswitch;
+        swV3            = -10.0/(d*d*d);
+        swV4            =  15.0/(d*d*d*d);
+        swV5            =  -6.0/(d*d*d*d*d);
+        swF2            = -30.0/(d*d*d);
+        swF3            =  60.0/(d*d*d*d);
+        swF4            = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Stupid compilers dont realize these variables will not be used */
+        rswitch         = 0.0;
+        swV3            = 0.0;
+        swV4            = 0.0;
+        swV5            = 0.0;
+        swF2            = 0.0;
+        swF3            = 0.0;
+        swF4            = 0.0;
+    }
+
+    bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
+    bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
 
     /* fix compiler warnings */
     nj1   = 0;
@@ -144,8 +221,8 @@ gmx_nb_free_energy_kernel(int                  icoul,
 
     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 
-    do_coultab = (icoul==enbcoulTAB);
-    do_vdwtab  = (ivdw==enbcoulTAB);
+    do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
+    do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
     
     do_tab = do_coultab || do_vdwtab;
     
@@ -154,11 +231,11 @@ gmx_nb_free_energy_kernel(int                  icoul,
     
     for(n=0; (n<nri); n++)
     {
-        is3              = 3*shift[n];     
+        is3              = 3*shift[n];
         shX              = shiftvec[is3];  
         shY              = shiftvec[is3+1];
         shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
+        nj0              = jindex[n];
         nj1              = jindex[n+1];    
         ii               = iinr[n];        
         ii3              = 3*ii;           
@@ -209,15 +286,16 @@ gmx_nb_free_energy_kernel(int                  icoul,
             qq[STATE_A]      = iqA*chargeA[jnr];
             qq[STATE_B]      = iqB*chargeB[jnr];
 
-            for (i=0;i<NSTATES;i++) 
+            for (i=0;i<NSTATES;i++)
             {
 
                 c6[i]              = nbfp[tj[i]];
                 c12[i]             = nbfp[tj[i]+1];
                 if((c6[i] > 0) && (c12[i] > 0))
                 {
-                    sigma6[i]       = c12[i]/c6[i];
-                    sigma2[i]       = pow(c12[i]/c6[i],1.0/3.0);
+                    /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+                    sigma6[i]       = 0.5*c12[i]/c6[i];
+                    sigma2[i]       = pow(sigma6[i],1.0/3.0);
                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
                     if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
@@ -278,6 +356,9 @@ gmx_nb_free_energy_kernel(int                  icoul,
                     rinvV          = pow(rpinvV,1.0/sc_r_power);
                     rV             = 1.0/rinvV;
 
+                    factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
+                    factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
+
                     if (do_tab)
                     {
                         rtC        = rC*tabscale;
@@ -293,94 +374,158 @@ gmx_nb_free_energy_kernel(int                  icoul,
                         n1V        = tab_elemsize*n0;
                     }
 
-                    if(icoul==enbcoulOOR || icoul==enbcoulFEWALD)
+                    if(qq[i] != 0)
                     {
-                        /* simple cutoff */
-                        Vcoul[i]   = qq[i]*rinvC;
-                        FscalC[i]  = Vcoul[i]*rpinvC;
-                    }
-                    else if(icoul==enbcoulRF)
-                    {
-                        /* reaction-field */
-                        krsq       = krf*rC*rC;
-                        Vcoul[i]   = qq[i]*(rinvC+krsq-crf);
-                        FscalC[i]  = qq[i]*(rinvC-2.0*krsq)*rpinvC;
-                    }
-                    else if (icoul==enbcoulTAB)
-                    {
-                        /* non-Ewald tabulated coulomb */
-                        nnn        = n1C;
-                        Y          = VFtab[nnn];
-                        F          = VFtab[nnn+1];
-                        Geps       = epsC*VFtab[nnn+2];
-                        Heps2      = eps2C*VFtab[nnn+3];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsC*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vcoul[i]   = qq[i]*VV;
-                        FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
+                        switch(icoul)
+                        {
+                            case GMX_NBKERNEL_ELEC_COULOMB:
+                            case GMX_NBKERNEL_ELEC_EWALD:
+                                /* simple cutoff (yes, ewald is done all on direct space for free energy) */
+                                Vcoul[i]   = qq[i]*rinvC;
+                                FscalC[i]  = Vcoul[i]*rpinvC;
+                                break;
+                                
+                            case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+                                /* reaction-field */
+                                Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
+                                FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
+                                break;
+
+                            case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+                                /* non-Ewald tabulated coulomb */
+                                nnn        = n1C;
+                                Y          = VFtab[nnn];
+                                F          = VFtab[nnn+1];
+                                Geps       = epsC*VFtab[nnn+2];
+                                Heps2      = eps2C*VFtab[nnn+3];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsC*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vcoul[i]   = qq[i]*VV;
+                                FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
+                                break;
+
+                            default:
+                                FscalC[i]  = 0.0;
+                                Vcoul[i]   = 0.0;
+                                break;
+                        }
+
+                        if(fr->coulomb_modifier==eintmodPOTSWITCH)
+                        {
+                            d                = rC-rswitch;
+                            d                = (d>0.0) ? d : 0.0;
+                            d2               = d*d;
+                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+                            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+                            Vcoul[i]        *= sw;
+                            FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
+                        }
+
+                        if(bExactElecCutoff)
+                        {
+                            FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
+                            Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
+                        }
                     }
 
-                    if(ivdw==enbvdwLJ)
+                    if((c6[i] != 0) || (c12[i] != 0))
                     {
-                        /* cutoff LJ */
-                        if (sc_r_power == 6.0)
+                        switch(ivdw)
                         {
-                            rinv6            = rpinvV;
+                            case GMX_NBKERNEL_VDW_LENNARDJONES:
+                                /* cutoff LJ */
+                                if (sc_r_power == 6.0)
+                                {
+                                    rinv6            = rpinvV;
+                                }
+                                else
+                                {
+                                    rinv6            = pow(rinvV,6.0);
+                                }
+                                Vvdw6            = c6[i]*rinv6;
+                                Vvdw12           = c12[i]*rinv6*rinv6;
+                                if(fr->vdw_modifier==eintmodPOTSHIFT)
+                                {
+                                    Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
+                                                        -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
+                                }
+                                else
+                                {
+                                    Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
+                                }
+                                FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
+                                break;
+
+                            case GMX_NBKERNEL_VDW_BUCKINGHAM:
+                                gmx_fatal(FARGS,"Buckingham free energy not supported.");
+                                break;
+
+                            case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+                                /* Table LJ */
+                                nnn = n1V+4;
+                                /* dispersion */
+                                Y          = VFtab[nnn];
+                                F          = VFtab[nnn+1];
+                                Geps       = epsV*VFtab[nnn+2];
+                                Heps2      = eps2V*VFtab[nnn+3];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsV*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vvdw[i]   += c6[i]*VV;
+                                FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
+
+                                /* repulsion */
+                                Y          = VFtab[nnn+4];
+                                F          = VFtab[nnn+5];
+                                Geps       = epsV*VFtab[nnn+6];
+                                Heps2      = eps2V*VFtab[nnn+7];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsV*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vvdw[i]   += c12[i]*VV;
+                                FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
+                                break;
+
+                            default:
+                                Vvdw[i]    = 0.0;
+                                FscalV[i]  = 0.0;
+                                break;
                         }
-                        else
+
+                        if(fr->vdw_modifier==eintmodPOTSWITCH)
                         {
-                            rinv6            = pow(rinvV,6.0);
+                            d                = rV-rswitch;
+                            d                = (d>0.0) ? d : 0.0;
+                            d2               = d*d;
+                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+                            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+                            Vvdw[i]         *= sw;
+                            FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
+
+                            FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
+                            Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
                         }
-                        Vvdw6            = c6[i]*rinv6;
-                        Vvdw12           = c12[i]*rinv6*rinv6;
-                        Vvdw[i]          = Vvdw12-Vvdw6;
-                        FscalV[i]        = (12.0*Vvdw12-6.0*Vvdw6)*rpinvV;
                     }
-                    else if(ivdw==enbvdwTAB)
-                    {
-                        /* Table LJ */
-                        nnn = n1V+4;
-
-                        /* dispersion */
-                        Y          = VFtab[nnn];
-                        F          = VFtab[nnn+1];
-                        Geps       = epsV*VFtab[nnn+2];
-                        Heps2      = eps2V*VFtab[nnn+3];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsV*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vvdw[i]   += c6[i]*VV;
-                        FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
-
-                        /* repulsion */
-                        Y          = VFtab[nnn+4];
-                        F          = VFtab[nnn+5];
-                        Geps       = epsV*VFtab[nnn+6];
-                        Heps2      = eps2V*VFtab[nnn+7];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsV*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vvdw[i]   += c12[i]*VV;
-                        FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
-                    }           
-                    /* Buckingham vdw free energy not supported for now */
                 }
             }
 
             Fscal = 0;
 
-            if (icoul==enbcoulFEWALD) {
+            if (icoul==GMX_NBKERNEL_ELEC_EWALD)
+            {
                 /* because we compute the softcore normally,
-                   we have to remove the ewald short range portion. Done outside of
-                   the states loop because this part doesn't depend on the scaled R */
+                 we have to remove the ewald short range portion. Done outside of
+                 the states loop because this part doesn't depend on the scaled R */
 
-                if (r != 0) 
+                if (r != 0)
                 {
                     VV    = gmx_erf(ewc*r)*rinv;
                     FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
                 }
-                else 
+                else
                 {
                     VV    = ewc*2.0/sqrt(M_PI);
                     FF    = 0;
@@ -437,6 +582,193 @@ gmx_nb_free_energy_kernel(int                  icoul,
 
     dvdl[efptCOUL]     += dvdl_coul;
     dvdl[efptVDW]      += dvdl_vdw;
-    *outeriter       = nri;            
-    *inneriter       = nj1;            
+
+    /* Estimate flops, average for free energy stuff:
+     * 12  flops per outer iteration
+     * 150 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
+}
+
+real
+nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
+                               real tabscale,real *vftab,
+                               real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
+                               real LFC[2], real LFV[2],real DLF[2],
+                               real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
+                               real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
+                               real *velectot, real *vvdwtot, real *dvdl)
+{
+    real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
+    real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
+    real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
+    real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
+    real       fscal_vdw[2],fscal_elec[2];
+    real       velec[2],vvdw[2];
+    int        i,ntab;
+
+    qq[0]    = qqA;
+    qq[1]    = qqB;
+    c6[0]    = c6A;
+    c6[1]    = c6B;
+    c12[0]   = c12A;
+    c12[1]   = c12B;
+
+    if (sc_r_power == 6.0)
+    {
+        rpm2             = r2*r2; /* r4 */
+        rp               = rpm2*r2; /* r6 */
+    }
+    else if (sc_r_power == 48.0)
+    {
+        rp               = r2*r2*r2;  /* r6 */
+        rp               = rp*rp; /* r12 */
+        rp               = rp*rp; /* r24 */
+        rp               = rp*rp; /* r48 */
+        rpm2             = rp/r2; /* r46 */
+    }
+    else
+    {
+        rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
+        rpm2           = rp/r2;
+    }
+
+    /* Loop over state A(0) and B(1) */
+    for (i=0;i<2;i++)
+    {
+        if((c6[i] > 0) && (c12[i] > 0))
+        {
+            sigma6[i]       = c12[i]/c6[i];
+            sigma2[i]       = pow(c12[i]/c6[i],1.0/3.0);
+            /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
+             what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
+            if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
+                sigma6[i] = sigma6_min;
+                sigma2[i] = sigma2_min;
+            }
+        }
+        else
+        {
+            sigma6[i]       = sigma6_def;
+            sigma2[i]       = sigma2_def;
+        }
+        if (sc_r_power == 6.0)
+        {
+            sigma_pow[i]    = sigma6[i];
+            sigma_powm2[i]  = sigma6[i]/sigma2[i];
+        }
+        else if (sc_r_power == 48.0)
+        {
+            sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
+            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
+            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
+            sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
+        }
+        else
+        {    /* not really supported as input, but in here for testing the general case*/
+            sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
+            sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
+        }
+    }
+
+    /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+    if ((c12[0] > 0) && (c12[1] > 0)) {
+        alpha_vdw_eff    = 0;
+        alpha_coul_eff   = 0;
+    } else {
+        alpha_vdw_eff    = alpha_vdw;
+        alpha_coul_eff   = alpha_coul;
+    }
+
+    /* Loop over A and B states again */
+    for (i=0;i<2;i++)
+    {
+        fscal_elec[i] = 0;
+        fscal_vdw[i]  = 0;
+        velec[i]      = 0;
+        vvdw[i]       = 0;
+
+        /* Only spend time on A or B state if it is non-zero */
+        if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
+        {
+            /* Coulomb */
+            rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+            r_coul           = pow(rpinv,-1.0/sc_r_power);
+
+            /* Electrostatics table lookup data */
+            rtab             = r_coul*tabscale;
+            ntab             = rtab;
+            eps              = rtab-ntab;
+            eps2             = eps*eps;
+            ntab             = 12*ntab;
+            /* Electrostatics */
+            Y                = vftab[ntab];
+            F                = vftab[ntab+1];
+            Geps             = eps*vftab[ntab+2];
+            Heps2            = eps2*vftab[ntab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            velec[i]         = qq[i]*VV;
+            fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
+
+            /* Vdw */
+            rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+            r_vdw            = pow(rpinv,-1.0/sc_r_power);
+            /* Vdw table lookup data */
+            rtab             = r_vdw*tabscale;
+            ntab             = rtab;
+            eps              = rtab-ntab;
+            eps2             = eps*eps;
+            ntab             = 12*ntab;
+            /* Dispersion */
+            Y                = vftab[ntab+4];
+            F                = vftab[ntab+5];
+            Geps             = eps*vftab[ntab+6];
+            Heps2            = eps2*vftab[ntab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            vvdw[i]          = c6[i]*VV;
+            fscal_vdw[i]     = -c6[i]*FF;
+
+            /* Repulsion */
+            Y                = vftab[ntab+8];
+            F                = vftab[ntab+9];
+            Geps             = eps*vftab[ntab+10];
+            Heps2            = eps2*vftab[ntab+11];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            vvdw[i]         += c12[i]*VV;
+            fscal_vdw[i]    -= c12[i]*FF;
+            fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
+        }
+    }
+    /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
+    /* Assemble A and B states */
+    velecsum  = 0;
+    vvdwsum   = 0;
+    dvdl_coul = 0;
+    dvdl_vdw  = 0;
+    fscal     = 0;
+    for (i=0;i<2;i++)
+    {
+        velecsum      += LFC[i]*velec[i];
+        vvdwsum       += LFV[i]*vvdw[i];
+
+        fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
+
+        dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
+        dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
+    }
+
+    dvdl[efptCOUL]     += dvdl_coul;
+    dvdl[efptVDW]      += dvdl_vdw;
+
+    *velectot           = velecsum;
+    *vvdwtot            = vvdwsum;
+
+    return fscal;
 }
+
index 53696012324e4c7334e7b3aa2a568d7b44667975..4e7c63805a70f3f3258529c9ac758fef3443ef5b 100644 (file)
 #ifndef _nb_free_energy_h_
 #define _nb_free_energy_h_
 
+#include "nb_kernel.h"
 #include <typedefs.h>
 
 void
-gmx_nb_free_energy_kernel(int                  icoul,
-                          int                  ivdw,
-                          int                  nri,
-                          int *                iinr,
-                          int *                jindex,
-                          int *                jjnr,
-                          int *                shift,
-                          real *               shiftvec,
-                          real *               fshift,
-                          int *                gid,
-                          real *               x,
-                          real *               f,
-                          real *               chargeA,
-                          real *               chargeB,
-                          real                 facel,
-                          real                 krf,
-                          real                 crf,
-                          real                 ewc,
-                          real *               Vc,
-                          int *                typeA,
-                          int *                typeB,
-                          int                  ntype,
-                          real *               nbfp,
-                          real *               Vvdw,
-                          real                 tabscale,
-                          real *               VFtab,
-                          real                 lambda_coul,
-                          real                 lambda_vdw,
-                          real *               dvdl,
-                          real                 alpha_coul,
-                          real                 alpha_vdw,
-                          int                  lam_power,
-                          real                 sc_r_power,
-                          real                 sigma6_def,
-                          real                 sigma6_min,
-                          gmx_bool             bDoForces,
-                          int *                outeriter,
-                          int *                inneriter);
+gmx_nb_free_energy_kernel(t_nblist *                nlist,
+                          rvec *                    x,
+                          rvec *                    f,
+                          t_forcerec *              fr,
+                          t_mdatoms *               mdatoms,
+                          nb_kernel_data_t *        kernel_data,
+                          t_nrnb *                  nrnb);
+
+real
+nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,
+                               real alpha_vdw,real tabscale,real *vftab,
+                               real qqA, real c6A, real c12A, real qqB,
+                               real c6B, real c12B,real LFC[2], real LFV[2],real DLF[2],
+                               real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2],
+                               real dlfac_vdw[2],real sigma6_def, real sigma6_min,
+                               real sigma2_def, real sigma2_min,
+                               real *velectot, real *vvdwtot, real *dvdl);
 
 #endif
 
index 02a0ee8ab1464e7621cc03a485529ac0b6a65243..b052049de291fdada83b9fe78659bb95e53257ec 100644 (file)
 #include "vec.h"
 #include "typedefs.h"
 #include "nb_generic.h"
+#include "nrnb.h"
+
+#include "nonbonded.h"
+#include "nb_kernel.h"
+
 
 void
-gmx_nb_generic_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,  
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter)
+gmx_nb_generic_kernel(t_nblist *                nlist,
+                      rvec *                    xx,
+                      rvec *                    ff,
+                      t_forcerec *              fr,
+                      t_mdatoms *               mdatoms,
+                      nb_kernel_data_t *        kernel_data,
+                      t_nrnb *                  nrnb)
 {
-    int           nri,ntype,table_nelements,icoul,ivdw;
+    int           nri,ntype,table_nelements,ielec,ivdw;
     real          facel,gbtabscale;
     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid,nnn,n0;
     real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
+    real          fscal,felec,fvdw,velec,vvdw,tx,ty,tz;
     real          rinvsq;
     real          iq;
-    real          qq,vcoul,krsq,vctot;
+    real          qq,vctot;
     int           nti,nvdwparam;
     int           tj;
-       real          rt,r,eps,eps2,Y,F,Geps,Heps2,VV,FF,Fp,fijD,fijR;
+    real          rt,r,eps,eps2,Y,F,Geps,Heps2,VV,FF,Fp,fijD,fijR;
     real          rinvsix;
-    real          Vvdwtot;
-    real          Vvdw_rep,Vvdw_disp;
+    real          vvdwtot;
+    real          vvdw_rep,vvdw_disp;
     real          ix,iy,iz,fix,fiy,fiz;
     real          jx,jy,jz;
     real          dx,dy,dz,rsq,rinv;
     real          c6,c12,cexp1,cexp2,br;
-       real *        charge;
-       real *        shiftvec;
-       real *        vdwparam;
-       int *         shift;
-       int *         type;
-       
-       icoul               = nlist->icoul;
-       ivdw                = nlist->ivdw;
-
-       /* avoid compiler warnings for cases that cannot happen */
-       nnn                 = 0;
-       vcoul               = 0.0;
-       eps                 = 0.0;
-       eps2                = 0.0;
-       
-       /* 3 VdW parameters for buckingham, otherwise 2 */
-       nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-       table_nelements     = (icoul==3) ? 4 : 0;
-       table_nelements    += (ivdw==3) ? 8 : 0;
-         
+    real *        charge;
+    real *        shiftvec;
+    real *        vdwparam;
+    int *         shift;
+    int *         type;
+    real *        fshift;
+    real *        velecgrp;
+    real *        vvdwgrp;
+    real          tabscale;
+    real *        VFtab;
+    real *        x;
+    real *        f;
+    int           ewitab;
+    real          ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real *        ewtab;
+    real          rcoulomb2,rvdw,rvdw2,sh_invrc6;
+    real          rcutoff,rcutoff2;
+    real          rswitch_elec,rswitch_vdw,d,d2,sw,dsw,rinvcorr;
+    real          elec_swV3,elec_swV4,elec_swV5,elec_swF2,elec_swF3,elec_swF4;
+    real          vdw_swV3,vdw_swV4,vdw_swV5,vdw_swF2,vdw_swF3,vdw_swF4;
+    gmx_bool      bExactElecCutoff,bExactVdwCutoff,bExactCutoff;
+
+    x                   = xx[0];
+    f                   = ff[0];
+    ielec               = nlist->ielec;
+    ivdw                = nlist->ivdw;
+
+    fshift              = fr->fshift[0];
+    velecgrp            = kernel_data->energygrp_elec;
+    vvdwgrp             = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+
+    sh_ewald            = fr->ic->sh_ewald;
+    ewtab               = fr->ic->tabq_coul_FDV0;
+    ewtabscale          = fr->ic->tabq_scale;
+    ewtabhalfspace      = 0.5/ewtabscale;
+
+    rcoulomb2           = fr->rcoulomb*fr->rcoulomb;
+    rvdw                = fr->rvdw;
+    rvdw2               = rvdw*rvdw;
+    sh_invrc6           = fr->ic->sh_invrc6;
+
+    if(fr->coulomb_modifier==eintmodPOTSWITCH)
+    {
+        d               = fr->rcoulomb-fr->rcoulomb_switch;
+        elec_swV3       = -10.0/(d*d*d);
+        elec_swV4       =  15.0/(d*d*d*d);
+        elec_swV5       =  -6.0/(d*d*d*d*d);
+        elec_swF2       = -30.0/(d*d*d);
+        elec_swF3       =  60.0/(d*d*d*d);
+        elec_swF4       = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+        elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
+    }
+    if(fr->vdw_modifier==eintmodPOTSWITCH)
+    {
+        d               = fr->rvdw-fr->rvdw_switch;
+        vdw_swV3        = -10.0/(d*d*d);
+        vdw_swV4        =  15.0/(d*d*d*d);
+        vdw_swV5        =  -6.0/(d*d*d*d*d);
+        vdw_swF2        = -30.0/(d*d*d);
+        vdw_swF3        =  60.0/(d*d*d*d);
+        vdw_swF4        = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+        vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
+    }
+
+    bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
+    bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
+    bExactCutoff        = bExactElecCutoff || bExactVdwCutoff;
+
+    if(bExactCutoff)
+    {
+        rcutoff  = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw;
+        rcutoff2 = rcutoff*rcutoff;
+    }
+    else
+    {
+        /* Fix warnings for stupid compilers */
+        rcutoff = rcutoff2 = 1e30;
+    }
+
+    /* avoid compiler warnings for cases that cannot happen */
+    nnn                 = 0;
+    eps                 = 0.0;
+    eps2                = 0.0;
+
+    /* 3 VdW parameters for buckingham, otherwise 2 */
+    nvdwparam           = (ivdw==GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2;
+    table_nelements     = 12;
+
     charge              = mdatoms->chargeA;
-       type                = mdatoms->typeA;
-       facel               = fr->epsfac;
-       shiftvec            = fr->shift_vec[0];
-       vdwparam            = fr->nbfp;
-       ntype               = fr->ntype;
-       
-       for(n=0; (n<nlist->nri); n++)
+    type                = mdatoms->typeA;
+    facel               = fr->epsfac;
+    shiftvec            = fr->shift_vec[0];
+    vdwparam            = fr->nbfp;
+    ntype               = fr->ntype;
+
+    for(n=0; (n<nlist->nri); n++)
     {
-        is3              = 3*nlist->shift[n];     
+        is3              = 3*nlist->shift[n];
         shX              = shiftvec[is3];  
         shY              = shiftvec[is3+1];
         shZ              = shiftvec[is3+2];
@@ -118,9 +196,9 @@ gmx_nb_generic_kernel(t_nblist *           nlist,
         iq               = facel*charge[ii];
         nti              = nvdwparam*ntype*type[ii];
         vctot            = 0;              
-        Vvdwtot          = 0;              
-        fix              = 0;              
-        fiy              = 0;              
+        vvdwtot          = 0;
+        fix              = 0;
+        fiy              = 0;
         fiz              = 0;              
         
         for(k=nj0; (k<nj1); k++)
@@ -136,156 +214,223 @@ gmx_nb_generic_kernel(t_nblist *           nlist,
             rsq              = dx*dx+dy*dy+dz*dz;
             rinv             = gmx_invsqrt(rsq);
             rinvsq           = rinv*rinv;  
-                       fscal            = 0;
-                       
-                       if(icoul==3 || ivdw==3)
-                       {
-                               r                = rsq*rinv;
-                               rt               = r*tabscale;     
-                               n0               = rt;             
-                               eps              = rt-n0;          
-                               eps2             = eps*eps;        
-                               nnn              = table_nelements*n0;                                          
-                       }
-                       
-                       /* Coulomb interaction. icoul==0 means no interaction */
-                       if(icoul>0)
-                       {
-                               qq               = iq*charge[jnr]; 
-
-                               switch(icoul)
-                               {
-                                       case 1:
-                                               /* Vanilla cutoff coulomb */
-                                               vcoul            = qq*rinv;      
-                                               fscal            = vcoul*rinvsq; 
-                                               break;
-
-                                       case 2:
-                                               /* Reaction-field */
-                                               krsq             = fr->k_rf*rsq;      
-                                               vcoul            = qq*(rinv+krsq-fr->c_rf);
-                                               fscal            = qq*(rinv-2.0*krsq)*rinvsq;
-                                               break;
-
-                                       case 3:
-                                               /* Tabulated coulomb */
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               nnn             += 4;
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               vcoul            = qq*VV;          
-                                               fscal            = -qq*FF*tabscale*rinv;
-                                               break;
-                                       
-                                       case 4:
-                                               /* GB */
-                                               gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
-                                               break;
-                                       
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
-                                               break;
-                               }
-                               vctot            = vctot+vcoul;    
-                       } /* End of coulomb interactions */
-                       
-                       
-                       /* VdW interaction. ivdw==0 means no interaction */
-                       if(ivdw>0)
-                       {
-                               tj               = nti+nvdwparam*type[jnr];
-                               
-                               switch(ivdw)
-                               {
-                                       case 1:
-                                               /* Vanilla Lennard-Jones cutoff */
-                                               c6               = vdwparam[tj];   
-                                               c12              = vdwparam[tj+1]; 
-                                               
-                                               rinvsix          = rinvsq*rinvsq*rinvsq;
-                                               Vvdw_disp        = c6*rinvsix;     
-                                               Vvdw_rep         = c12*rinvsix*rinvsix;
-                                               fscal           += (12.0*Vvdw_rep-6.0*Vvdw_disp)*rinvsq;
-                                               Vvdwtot          = Vvdwtot+Vvdw_rep-Vvdw_disp;
-                                               break;
-                                               
-                                       case 2:
-                                               /* Buckingham */
-                                               c6               = vdwparam[tj];   
-                                               cexp1            = vdwparam[tj+1]; 
-                                               cexp2            = vdwparam[tj+2]; 
-                                               
-                                               rinvsix          = rinvsq*rinvsq*rinvsq;
-                                               Vvdw_disp        = c6*rinvsix;     
-                                               br               = cexp2*rsq*rinv;
-                                               Vvdw_rep         = cexp1*exp(-br); 
-                                               fscal           += (br*Vvdw_rep-6.0*Vvdw_disp)*rinvsq;
-                                               Vvdwtot          = Vvdwtot+Vvdw_rep-Vvdw_disp;
-                                               break;
-                                               
-                                       case 3:
-                                               /* Tabulated VdW */
-                                               c6               = vdwparam[tj];   
-                                               c12              = vdwparam[tj+1]; 
-
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               Vvdw_disp        = c6*VV;          
-                                               fijD             = c6*FF;          
-                                               nnn             += 4;          
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               Vvdw_rep         = c12*VV;         
-                                               fijR             = c12*FF;         
-                                               fscal           += -(fijD+fijR)*tabscale*rinv;
-                                               Vvdwtot          = Vvdwtot + Vvdw_disp + Vvdw_rep;                                              
-                                               break;
-                                                                                               
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic VdW interaction for ivdw=%d.\n",ivdw);
-                                               break;
-                               }
-                       } /* end VdW interactions */
-                       
-                       
-            tx               = fscal*dx;     
-            ty               = fscal*dy;     
-            tz               = fscal*dz;     
-            fix              = fix + tx;      
-            fiy              = fiy + ty;      
-            fiz              = fiz + tz;      
+            felec            = 0;
+            fvdw             = 0;
+            velec            = 0;
+            vvdw             = 0;
+
+            if(bExactCutoff && rsq>rcutoff2)
+            {
+                continue;
+            }
+
+            if(ielec==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE)
+            {
+                r                = rsq*rinv;
+                rt               = r*tabscale;
+                n0               = rt;
+                eps              = rt-n0;
+                eps2             = eps*eps;
+                nnn              = table_nelements*n0;
+            }
+
+            /* Coulomb interaction. ielec==0 means no interaction */
+            if(ielec!=GMX_NBKERNEL_ELEC_NONE)
+            {
+                qq               = iq*charge[jnr];
+
+                switch(ielec)
+                {
+                    case GMX_NBKERNEL_ELEC_NONE:
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_COULOMB:
+                        /* Vanilla cutoff coulomb */
+                        velec            = qq*rinv;
+                        felec            = velec*rinvsq;
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+                        /* Reaction-field */
+                        velec            = qq*(rinv+fr->k_rf*rsq-fr->c_rf);
+                        felec            = qq*(rinv*rinvsq-2.0*fr->k_rf);
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+                        /* Tabulated coulomb */
+                        Y                = VFtab[nnn];
+                        F                = VFtab[nnn+1];
+                        Geps             = eps*VFtab[nnn+2];
+                        Heps2            = eps2*VFtab[nnn+3];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        velec            = qq*VV;
+                        felec            = -qq*FF*tabscale*rinv;
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
+                        /* GB */
+                        gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_EWALD:
+                        ewrt             = rsq*rinv*ewtabscale;
+                        ewitab           = ewrt;
+                        eweps            = ewrt-ewitab;
+                        ewitab           = 4*ewitab;
+                        felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+                        rinvcorr         = (fr->coulomb_modifier==eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv;
+                        velec            = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+                        felec            = qq*rinv*(rinvsq-felec);
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
+                        break;
+                }
+                if(fr->coulomb_modifier==eintmodPOTSWITCH)
+                {
+                    d                = rsq*rinv-fr->rcoulomb_switch;
+                    d                = (d>0.0) ? d : 0.0;
+                    d2               = d*d;
+                    sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
+                    dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
+                    velec           *= sw;
+                    felec            = felec*sw + velec*dsw;
+                }
+                if(bExactElecCutoff)
+                {
+                    felec            = (rsq<=rcoulomb2) ? felec : 0.0;
+                    velec            = (rsq<=rcoulomb2) ? velec : 0.0;
+                }
+                vctot           += velec;
+            } /* End of coulomb interactions */
+
+
+            /* VdW interaction. ivdw==0 means no interaction */
+            if(ivdw!=GMX_NBKERNEL_VDW_NONE)
+            {
+                tj               = nti+nvdwparam*type[jnr];
+
+                switch(ivdw)
+                {
+                    case GMX_NBKERNEL_VDW_NONE:
+                        break;
+
+                    case GMX_NBKERNEL_VDW_LENNARDJONES:
+                        /* Vanilla Lennard-Jones cutoff */
+                        c6               = vdwparam[tj];
+                        c12              = vdwparam[tj+1];
+                        rinvsix          = rinvsq*rinvsq*rinvsq;
+                        vvdw_disp        = c6*rinvsix;
+                        vvdw_rep         = c12*rinvsix*rinvsix;
+                        fvdw             = (vvdw_rep-vvdw_disp)*rinvsq;
+                        if(fr->vdw_modifier==eintmodPOTSHIFT)
+                        {
+                            vvdw             = (vvdw_rep-c12*sh_invrc6*sh_invrc6)*(1.0/12.0)-(vvdw_disp-c6*sh_invrc6)*(1.0/6.0);
+                        }
+                        else
+                        {
+                            vvdw             = vvdw_rep/12.0-vvdw_disp/6.0;
+                        }
+                        break;
+
+                    case GMX_NBKERNEL_VDW_BUCKINGHAM:
+                        /* Buckingham */
+                        c6               = vdwparam[tj];
+                        cexp1            = vdwparam[tj+1];
+                        cexp2            = vdwparam[tj+2];
+
+                        rinvsix          = rinvsq*rinvsq*rinvsq;
+                        vvdw_disp        = c6*rinvsix;
+                        br               = cexp2*rsq*rinv;
+                        vvdw_rep         = cexp1*exp(-br);
+                        fvdw             = (br*vvdw_rep-vvdw_disp)*rinvsq;
+                        if(fr->vdw_modifier==eintmodPOTSHIFT)
+                        {
+                            vvdw             = (vvdw_rep-cexp1*exp(-cexp2*rvdw))-(vvdw_disp-c6*sh_invrc6)/6.0;
+                        }
+                        else
+                        {
+                            vvdw             = vvdw_rep-vvdw_disp/6.0;
+                        }
+                        break;
+
+                    case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+                        /* Tabulated VdW */
+                        c6               = vdwparam[tj];
+                        c12              = vdwparam[tj+1];
+                        Y                = VFtab[nnn+4];
+                        F                = VFtab[nnn+5];
+                        Geps             = eps*VFtab[nnn+6];
+                        Heps2            = eps2*VFtab[nnn+7];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vvdw_disp        = c6*VV;
+                        fijD             = c6*FF;
+                        Y                = VFtab[nnn+8];
+                        F                = VFtab[nnn+9];
+                        Geps             = eps*VFtab[nnn+10];
+                        Heps2            = eps2*VFtab[nnn+11];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vvdw_rep         = c12*VV;
+                        fijR             = c12*FF;
+                        fvdw             = -(fijD+fijR)*tabscale*rinv;
+                        vvdw             = vvdw_disp + vvdw_rep;
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic VdW interaction for ivdw=%d.\n",ivdw);
+                        break;
+                }
+                if(fr->vdw_modifier==eintmodPOTSWITCH)
+                {
+                    d                = rsq*rinv-fr->rvdw_switch;
+                    d                = (d>0.0) ? d : 0.0;
+                    d2               = d*d;
+                    sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
+                    dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
+                    vvdw            *= sw;
+                    fvdw             = fvdw*sw + vvdw*dsw;
+                }
+                if(bExactVdwCutoff)
+                {
+                    fvdw             = (rsq<=rvdw2) ? fvdw : 0.0;
+                    vvdw             = (rsq<=rvdw2) ? vvdw : 0.0;
+                }
+                vvdwtot         += vvdw;
+            } /* end VdW interactions */
+
+            fscal            = felec+fvdw;
+
+            tx               = fscal*dx;
+            ty               = fscal*dy;
+            tz               = fscal*dz;
+            fix              = fix + tx;
+            fiy              = fiy + ty;
+            fiz              = fiz + tz;
             f[j3+0]          = f[j3+0] - tx;
             f[j3+1]          = f[j3+1] - ty;
             f[j3+2]          = f[j3+2] - tz;
         }
-        
+
         f[ii3+0]         = f[ii3+0] + fix;
         f[ii3+1]         = f[ii3+1] + fiy;
         f[ii3+2]         = f[ii3+2] + fiz;
         fshift[is3]      = fshift[is3]+fix;
         fshift[is3+1]    = fshift[is3+1]+fiy;
         fshift[is3+2]    = fshift[is3+2]+fiz;
-        ggid             = nlist->gid[n];         
-        Vc[ggid]         = Vc[ggid] + vctot;
-        Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
+        ggid             = nlist->gid[n];
+        velecgrp[ggid]  += vctot;
+        vvdwgrp[ggid]   += vvdwtot;
     }
-    
-    *outeriter       = nlist->nri;            
-    *inneriter       = nlist->jindex[n];               
+    /* Estimate flops, average for generic kernel:
+     * 12 flops per outer iteration
+     * 50 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_GENERIC,nlist->nri*12 + nlist->jindex[n]*50);
 }
-
index fc44051cbc670468e940035bbd4ef2e426b02f65..845ca5699767f74a0c8674f03dfdb4a2acfef3ad 100644 (file)
 #ifndef _nb_generic_h_
 #define _nb_generic_h_
 
+#include "nb_kernel.h"
 #include "types/simple.h"
 #include "typedefs.h"
 
 void
-gmx_nb_generic_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,  
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter);
+gmx_nb_generic_kernel(t_nblist *                nlist,
+                      rvec *                    x,
+                      rvec *                    f,
+                      t_forcerec *              fr,
+                      t_mdatoms *               mdatoms,
+                      nb_kernel_data_t *        kernel_data,
+                      t_nrnb *                  nrnb);
+
 
 #endif
 
index 052d3f2e756c31dd887830ec772e70687df5fcfa..364a568b1c700bdc049b457256eff49a72673b07 100644 (file)
 #define ALMOST_ONE 1-(1e-30)
 void
 gmx_nb_generic_adress_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter,
-                                          gmx_bool                bCG)
+                             t_forcerec *         fr,
+                             t_mdatoms *          mdatoms,
+                             real *               x,
+                             real *               f,
+                             real *               fshift,
+                             real *               Vc,
+                             real *               Vvdw,
+                             real                 tabscale,
+                             real *               VFtab,
+                             int *                outeriter,
+                             int *                inneriter,
+                             gmx_bool                bCG)
 {
-    int           nri,ntype,table_nelements,icoul,ivdw;
+    int           nri,ntype,table_nelements,ielec,ivdw;
     real          facel,gbtabscale;
     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid,nnn,n0;
     real          shX,shY,shZ;
@@ -96,7 +96,7 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
 
     force_cap = fr->adress_ex_forcecap;
 
-    icoul               = nlist->icoul;
+    ielec               = nlist->ielec;
     ivdw                = nlist->ivdw;
 
     /* avoid compiler warnings for cases that cannot happen */
@@ -107,7 +107,7 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
 
     /* 3 VdW parameters for buckingham, otherwise 2 */
     nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-    table_nelements     = (icoul==3) ? 4 : 0;
+    table_nelements     = (ielec==3) ? 4 : 0;
     table_nelements    += (ivdw==3) ? 8 : 0;
 
     charge              = mdatoms->chargeA;
@@ -205,72 +205,71 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
             rinvsq           = rinv*rinv;
 
 
-                       fscal            = 0;
+            fscal            = 0;
 
-                       if(icoul==3 || ivdw==3)
-                       {
-                               r                = rsq*rinv;
-                               rt               = r*tabscale;
-                               n0               = rt;
-                               eps              = rt-n0;
-                               eps2             = eps*eps;
-                               nnn              = table_nelements*n0;
-                       }
-
-                       /* Coulomb interaction. icoul==0 means no interaction */
-                       if(icoul>0)
-                       {
-                               qq               = iq*charge[jnr];
-
-                               switch(icoul)
-                               {
-                                       case 1:
-                                               /* Vanilla cutoff coulomb */
-                                               vcoul            = qq*rinv;
-                                               fscal            = vcoul*rinvsq;
-                                               break;
-
-                                       case 2:
-                                               /* Reaction-field */
-                                               krsq             = fr->k_rf*rsq;
-                                               vcoul            = qq*(rinv+krsq-fr->c_rf);
-                                               fscal            = qq*(rinv-2.0*krsq)*rinvsq;
-                                               break;
-
-                                       case 3:
-                                               /* Tabulated coulomb */
-                                               Y                = VFtab[nnn];
-                                               F                = VFtab[nnn+1];
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               nnn             += 4;
-                                               Fp               = F+Geps+Heps2;
-                                               VV               = Y+eps*Fp;
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               vcoul            = qq*VV;
-                                               fscal            = -qq*FF*tabscale*rinv;
-                                               break;
-
-                                       case 4:
-                                               /* GB */
-                                               gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
-                                               break;
+            if(ielec==3 || ivdw==3)
+            {
+                r                = rsq*rinv;
+                rt               = r*tabscale;
+                n0               = rt;
+                eps              = rt-n0;
+                eps2             = eps*eps;
+                nnn              = table_nelements*n0;
+            }
 
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
-                                               break;
-                               }
-                               vctot            = vctot+vcoul;
-                       } /* End of coulomb interactions */
+            /* Coulomb interaction. ielec==0 means no interaction */
+            if(ielec>0)
+            {
+                qq               = iq*charge[jnr];
 
+                switch(ielec)
+                {
+                    case 1:
+                        /* Vanilla cutoff coulomb */
+                        vcoul            = qq*rinv;
+                        fscal            = vcoul*rinvsq;
+                        break;
+
+                    case 2:
+                        /* Reaction-field */
+                        krsq             = fr->k_rf*rsq;
+                        vcoul            = qq*(rinv+krsq-fr->c_rf);
+                        fscal            = qq*(rinv-2.0*krsq)*rinvsq;
+                        break;
+
+                    case 3:
+                        /* Tabulated coulomb */
+                        Y                = VFtab[nnn];
+                        F                = VFtab[nnn+1];
+                        Geps             = eps*VFtab[nnn+2];
+                        Heps2            = eps2*VFtab[nnn+3];
+                        nnn             += 4;
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vcoul            = qq*VV;
+                        fscal            = -qq*FF*tabscale*rinv;
+                        break;
+
+                    case 4:
+                        /* GB */
+                        gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
+                        break;
+                }
+                vctot            = vctot+vcoul;
+            } /* End of coulomb interactions */
 
-                       /* VdW interaction. ivdw==0 means no interaction */
-                       if(ivdw>0)
-                       {
-                               tj               = nti+nvdwparam*type[jnr];
+            /* VdW interaction. ivdw==0 means no interaction */
+            if(ivdw>0)
+            {
+                tj               = nti+nvdwparam*type[jnr];
 
-                               switch(ivdw)
-                               {
+                switch(ivdw)
+                {
                                        case 1:
                                                /* Vanilla Lennard-Jones cutoff */
                                                c6               = vdwparam[tj];
index 400866f898e391b931fce0557d756505c8e94546..5349f26e7dfdf159466bbf28244dc0c9834236cb 100644 (file)
 #include "vec.h"
 #include "typedefs.h"
 #include "nb_generic_cg.h"
+#include "nonbonded.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
 
  void
- gmx_nb_generic_cg_kernel(t_nblist *           nlist,
-                          t_forcerec *         fr,
-                          t_mdatoms *          mdatoms,
-                          real *               x,
-                          real *               f,
-                          real *               fshift,
-                          real *               Vc,
-                          real *               Vvdw,
-                          real                 tabscale,  
-                          real *               VFtab,
-                          int *                outeriter,
-                          int *                inneriter)
+gmx_nb_generic_cg_kernel(t_nblist *                nlist,
+                         rvec *                    xx,
+                         rvec *                    ff,
+                         t_forcerec *              fr,
+                         t_mdatoms *               mdatoms,
+                         nb_kernel_data_t *        kernel_data,
+                         t_nrnb *                  nrnb)
 {
-     int           nri,ntype,table_nelements,icoul,ivdw;
+     int           nri,ntype,table_nelements,ielec,ivdw;
      real          facel,gbtabscale;
      int           n,is3,i3,k,nj0,nj1,j3,ggid,nnn,n0;
      int           ai0,ai1,ai,aj0,aj1,aj;
      int *         shift;
      int *         type;
      t_excl *      excl;
-       
-     icoul               = nlist->icoul;
+     real *        fshift;
+     real *        Vc;
+     real *        Vvdw;
+     real          tabscale;
+     real *        VFtab;
+     real *        x;
+     real *        f;
+
+     x                   = xx[0];
+     f                   = ff[0];
+     ielec               = nlist->ielec;
      ivdw                = nlist->ivdw;
-     
+
+     fshift              = fr->fshift[0];
+     Vc                  = kernel_data->energygrp_elec;
+     Vvdw                = kernel_data->energygrp_vdw;
+     tabscale            = kernel_data->table_elec_vdw->scale;
+     VFtab               = kernel_data->table_elec_vdw->data;
+
      /* avoid compiler warnings for cases that cannot happen */
      nnn                 = 0;
      vcoul               = 0.0;
      
      /* 3 VdW parameters for buckingham, otherwise 2 */
      nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-     table_nelements     = (icoul==3) ? 4 : 0;
+     table_nelements     = (ielec==3) ? 4 : 0;
      table_nelements    += (ivdw==3) ? 8 : 0;
      
      charge              = mdatoms->chargeA;
                      rinvsq           = rinv*rinv;  
                      fscal            = 0;
 
-                     if (icoul==3 || ivdw==3)
+                     if (ielec==3 || ivdw==3)
                      {
                          r                = rsq*rinv;
                          rt               = r*tabscale;     
                          nnn              = table_nelements*n0;                                        
                      }
                      
-                     /* Coulomb interaction. icoul==0 means no interaction */
-                     if (icoul > 0)
+                     /* Coulomb interaction. ielec==0 means no interaction */
+                     if (ielec > 0)
                      {
                          qq               = iq*charge[aj]; 
                          
-                         switch(icoul)
+                         switch(ielec)
                          {
                          case 1:
                              /* Vanilla cutoff coulomb */
                            break;
                            
                          default:
-                             gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
+                             gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
                              break;
                          }
                          vctot            = vctot+vcoul;    
          Vc[ggid]        += vctot;
          Vvdw[ggid]      += Vvdwtot;
      }
-     
-     *outeriter       = nlist->nri;            
-     *inneriter       = nlist->jindex[n];              
+    /* Estimate flops, average for generic cg kernel:
+     * 12  flops per outer iteration
+     * 100 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*100);
 }
 
index ac50315688439da5c66d535d9540575f32449693..5e760522d5974a4d51b9eae7b44c5fe65c01c4c6 100644 (file)
 
 #include "types/simple.h"
 #include "typedefs.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
 
 void
-gmx_nb_generic_cg_kernel(t_nblist *           nlist,
-                        t_forcerec *         fr,
-                        t_mdatoms *          mdatoms,
-                        real *               x,
-                        real *               f,
-                        real *               fshift,
-                        real *               Vc,
-                        real *               Vvdw,
-                        real                 tabscale,  
-                        real *               VFtab,
-                        int *                outeriter,
-                        int *                inneriter);
+gmx_nb_generic_cg_kernel(t_nblist *                nlist,
+                         rvec *                    x,
+                         rvec *                    f,
+                         t_forcerec *              fr,
+                         t_mdatoms *               mdatoms,
+                         nb_kernel_data_t *        kernel_data,
+                         t_nrnb *                  nrnb);
 
 #endif
 
diff --git a/src/gmxlib/nonbonded/nb_kernel.c b/src/gmxlib/nonbonded/nb_kernel.c
new file mode 100644 (file)
index 0000000..c210e21
--- /dev/null
@@ -0,0 +1,177 @@
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ * This file is part of GROMACS.
+ * Copyright (c) 2012-
+ *
+ * Written by the Gromacs development team under coordination of
+ * David van der Spoel, Berk Hess, and Erik Lindahl.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org
+ *
+ * And Hey:
+ * Gnomes, ROck Monsters And Chili Sauce
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nb_kernel.h"
+#include "smalloc.h"
+#include "string2.h"
+
+
+/* Static data structures to find kernels */
+static nb_kernel_info_t *   kernel_list           = NULL;
+static unsigned int         kernel_list_size      = 0;
+static int *                kernel_list_hash      = NULL;
+static unsigned int         kernel_list_hash_size = 0;
+
+static unsigned int
+nb_kernel_hash_func(const char *   arch,
+                    const char *   elec,
+                    const char *   elec_mod,
+                    const char *   vdw,
+                    const char *   vdw_mod,
+                    const char *   geom,
+                    const char *   other,
+                    const char *   vf)
+{
+    unsigned int hash;
+
+    hash = gmx_string_hash_func(arch,gmx_string_hash_init);
+    hash = gmx_string_hash_func(elec,hash);
+    hash = gmx_string_hash_func(elec_mod,hash);
+    hash = gmx_string_hash_func(vdw,hash);
+    hash = gmx_string_hash_func(vdw_mod,hash);
+    hash = gmx_string_hash_func(geom,hash);
+    hash = gmx_string_hash_func(other,hash);
+    hash = gmx_string_hash_func(vf,hash);
+
+    return hash;
+}
+
+void
+nb_kernel_list_add_kernels(nb_kernel_info_t *   new_kernel_list,
+                           int                  new_kernel_list_size)
+{
+    srenew(kernel_list,kernel_list_size+new_kernel_list_size);
+    memcpy(kernel_list+kernel_list_size,new_kernel_list,new_kernel_list_size*sizeof(nb_kernel_info_t));
+    kernel_list_size += new_kernel_list_size;
+}
+
+
+int
+nb_kernel_list_hash_init(void)
+{
+    unsigned int            i;
+    unsigned int            index;
+
+    kernel_list_hash_size   = kernel_list_size*5;
+    snew(kernel_list_hash,kernel_list_hash_size);
+
+    for(i=0;i<kernel_list_hash_size;i++)
+    {
+        kernel_list_hash[i] = -1;
+    }
+    for(i=0;i<kernel_list_size;i++)
+    {
+        index=nb_kernel_hash_func(kernel_list[i].architecture,
+                                  kernel_list[i].electrostatics,
+                                  kernel_list[i].electrostatics_modifier,
+                                  kernel_list[i].vdw,
+                                  kernel_list[i].vdw_modifier,
+                                  kernel_list[i].geometry,
+                                  kernel_list[i].other,
+                                  kernel_list[i].vf) % kernel_list_hash_size;
+
+        /* Check for collisions and advance if necessary */
+        while( kernel_list_hash[index] != -1 )
+        {
+            index = (index+1) % kernel_list_hash_size;
+        }
+
+        kernel_list_hash[index] = i;
+    }
+    return 0;
+}
+
+void
+nb_kernel_list_hash_destroy()
+{
+    sfree(kernel_list_hash);
+    kernel_list_hash = NULL;
+    kernel_list_hash_size = 0;
+}
+
+
+nb_kernel_t *
+nb_kernel_list_findkernel(FILE *              log,
+                          const char *        arch,
+                          const char *        electrostatics,
+                          const char *        electrostatics_modifier,
+                          const char *        vdw,
+                          const char *        vdw_modifier,
+                          const char *        geometry,
+                          const char *        other,
+                          const char *        vf)
+{
+    int                 i;
+    unsigned int        index;
+    nb_kernel_info_t *  kernelinfo_ptr;
+
+    if(kernel_list_hash_size==0)
+    {
+        return NULL;
+    }
+
+    index=nb_kernel_hash_func(arch,
+                              electrostatics,
+                              electrostatics_modifier,
+                              vdw,
+                              vdw_modifier,
+                              geometry,
+                              other,
+                              vf) % kernel_list_hash_size;
+
+    kernelinfo_ptr = NULL;
+    while( (i=kernel_list_hash[index]) != -1)
+    {
+        if(!gmx_strcasecmp_min(kernel_list[i].architecture,arch) &&
+           !gmx_strcasecmp_min(kernel_list[i].electrostatics,electrostatics) &&
+           !gmx_strcasecmp_min(kernel_list[i].electrostatics_modifier,electrostatics_modifier) &&
+           !gmx_strcasecmp_min(kernel_list[i].vdw,vdw) &&
+           !gmx_strcasecmp_min(kernel_list[i].vdw_modifier,vdw_modifier) &&
+           !gmx_strcasecmp_min(kernel_list[i].geometry,geometry) &&
+           !gmx_strcasecmp_min(kernel_list[i].other,other) &&
+           !gmx_strcasecmp_min(kernel_list[i].vf,vf))
+        {
+            kernelinfo_ptr = kernel_list+i;
+            break;
+        }
+        index = (index+1) % kernel_list_hash_size;
+    }
+
+    if(log && kernelinfo_ptr!=NULL)
+    {
+        fprintf(log,
+                "NB kernel %s() with architecture '%s' used for neighborlist with\n"
+                "    Elec: '%s', Modifier: '%s'\n"
+                "    Vdw:  '%s', Modifier: '%s'\n"
+                "    Geom: '%s', Other: '%s', Calc: '%s'\n\n",
+                kernelinfo_ptr->kernelname,arch,electrostatics,electrostatics_modifier,
+                vdw,vdw_modifier,geometry,other,vf);
+    }
+
+    /* If we did not find any kernel the pointer will still be NULL */
+    return (kernelinfo_ptr !=NULL) ? kernelinfo_ptr->kernelptr : NULL;
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel.h b/src/gmxlib/nonbonded/nb_kernel.h
new file mode 100644 (file)
index 0000000..2830ec6
--- /dev/null
@@ -0,0 +1,159 @@
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ * This file is part of GROMACS.
+ * Copyright (c) 2012-
+ *
+ * Written by the Gromacs development team under coordination of
+ * David van der Spoel, Berk Hess, and Erik Lindahl.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org
+ *
+ * And Hey:
+ * Gnomes, ROck Monsters And Chili Sauce
+ */
+#ifndef _nb_kernel_h_
+#define _nb_kernel_h_
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
+
+
+#include "types/simple.h"
+#include "typedefs.h"
+
+/* Structure to collect kernel data not available in forcerec or mdatoms structures.
+ * This is only used inside the nonbonded module.
+ */
+typedef struct
+{
+    int                flags;
+    t_blocka *         exclusions;
+    real *             lambda;
+    real *             dvdl;
+
+    /* pointers to tables */
+    t_forcetable *     table_elec;
+    t_forcetable *     table_vdw;
+    t_forcetable *     table_elec_vdw;
+
+    /* potentials */
+    real *             energygrp_elec;
+    real *             energygrp_vdw;
+    real *             energygrp_polarization;
+}
+nb_kernel_data_t;
+
+
+typedef void
+nb_kernel_t(t_nblist *                nlist,
+            rvec *                    x,
+            rvec *                    f,
+            t_forcerec *              fr,
+            t_mdatoms *               mdatoms,
+            nb_kernel_data_t *        kernel_data,
+            t_nrnb *                  nrnb);
+
+
+/* Structure with a kernel pointer and settings. This cannot be abstract
+ * since we define the kernel list statically for each architecture in a header,
+ * and use it to set up the kernel hash functions to find kernels.
+ *
+ * The electrostatics/vdw names should be obvious and correspond to the
+ * forms of the interactions calculated in this function, and the interaction
+ * modifiers describe switch/shift and similar alterations. Geometry refers
+ * to whether this kernel calculates interactions between single particles or
+ * waters (groups of 3/4 atoms) for better performance. Finally, the VF string
+ * selects whether the kernel calculates only potential, only force, or both
+ *
+ * The allowed values for kernel interactions are described by the
+ * enumerated types gmx_nbkernel_elec and gmx_nbkernel_vdw (see types/enums.h).
+ * Note that these are deliberately NOT identical to the interactions the
+ * user can set, since some user-specified interactions will be tabulated, and
+ * Lennard-Jones and Buckingham use different kernels while their setting in
+ * the input is decided by nonbonded parameter formats rather than mdp options.
+ *
+ * The interaction modifiers are described by the eintmod enum type, while
+ * the kernel geometry is decided from the neighborlist geometry, which is
+ * described by the enum gmx_nblist_kernel_geometry (again, see types/enums.h).
+ * The 
+ *
+ * Note that any particular implementation of kernels might not support all of 
+ * these strings. In fact, some might not be supported by any architecture yet.
+ * The whole point of using strings and hashes is that we do not have to define a 
+ * unique set of strings in a single place. Thus, as long as you implement a
+ * corresponding kernel, you could in theory provide any string you want.
+ */
+typedef struct nb_kernel_info
+{
+    nb_kernel_t *   kernelptr;
+    const char *    kernelname;
+    const char *    architecture;     /* e.g. "C", "SSE", "BlueGene", etc. */
+
+    const char *    electrostatics;
+    const char *    electrostatics_modifier;
+    const char *    vdw;
+    const char *    vdw_modifier;
+    const char *    geometry;
+    const char *    other;  /* Any extra info you want/need to select a kernel */
+    const char *    vf;     /* "PotentialAndForce", "Potential", or "Force" */
+}
+nb_kernel_info_t;
+
+
+void
+nb_kernel_list_add_kernels(nb_kernel_info_t *   new_kernelinfo,
+                           int                  new_size);
+
+int
+nb_kernel_list_hash_init(void);
+
+/* Return a function pointer to the nonbonded kernel pointer with
+ * settings according to the text strings provided. GROMACS does not guarantee
+ * the existence of accelerated kernels for any combination, so the return value
+ * can be NULL.
+ * In that case, you can try a different/lower-level acceleration, and
+ * eventually you need to prepare to fall back to generic kernels or change
+ * your settings and try again.
+ *
+ * The names of the text strings are obviously meant to reflect settings in
+ * GROMACS, but inside this routine they are merely used as a set of text
+ * strings not defined here. The routine will simply compare the arguments with
+ * the contents of the corresponding strings in the nb_kernel_list_t structure.
+ *
+ * This function does not check whether the kernel in question can run on the
+ * present architecture since that would require a slow cpuid call for every
+ * single invocation.
+ */
+nb_kernel_t *
+nb_kernel_list_findkernel(FILE *              log,
+                          const char *        architecture,
+                          const char *        electrostatics,
+                          const char *        electrostatics_modifier,
+                          const char *        vdw,
+                          const char *        vdw_modifier,
+                          const char *        geometry,
+                          const char *        other,
+                          const char *        vf);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _nb_kernel_h_ */
index c18cad56d66bf1b6e34be4846c38ac024a643cf9..c5aa611c83579f057d350b6454070026871c0b48 100644 (file)
@@ -36,7 +36,7 @@
 
 #include "types/nrnb.h"
 #include "nb_kernel_c_adress.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 /* Include standard kernel headers in local directory */
index a8956bb4110eee2818d34cccac364c9173928e6a..29013393ced511ae790b59b9919925ff9868daa5 100644 (file)
@@ -38,7 +38,7 @@
 
 #include <stdio.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 void
 nb_kernel_setup_adress(FILE *fplog,nb_adress_kernel_t **list);
index a5290c7efd49806c1d16d5bf506ba5d9e27da997..053aac3c23fee69bf38f2e6dab07ac8185d12bd9 100644 (file)
 
 
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 #include "nb_kernel_bluegene.h"
 
 
index 4e3800bcf4d717a1218b502be62b0d4e9738b2c1..bcf14d6201a9fd4a9b6c71840a6fa8d1afbd00c5 100644 (file)
@@ -28,7 +28,7 @@
 
 #include <types/simple.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 void
 nb_kernel_setup_bluegene(FILE *log,nb_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py b/src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
new file mode 100755 (executable)
index 0000000..d491308
--- /dev/null
@@ -0,0 +1,489 @@
+#!/usr/bin/python
+
+import sys
+import os
+sys.path.append ( "../preprocessor" )
+from gmxpreprocess import gmxpreprocess
+
+# "The happiest programs are programs that write other programs."
+#
+#
+# This script controls the generation of Gromacs nonbonded kernels.
+#
+# We no longer generate kernels on-the-fly, so this file is not run
+# during a Gromacs compile - only when we need to update the kernels (=rarely).
+#
+# To maximize performance, each combination of interactions in Gromacs
+# has a separate nonbonded kernel without conditionals in the code.
+# To avoid writing hundreds of different routines for each architecture,
+# we instead use a custom preprocessor so we can encode the conditionals
+# and expand for-loops (e.g, for water-water interactions)
+# from a general kernel template. While that file will contain quite a
+# few preprocessor directives, it is still an order of magnitude easier
+# to maintain than ~200 different kernels (not to mention it avoids bugs).
+#
+# To actually generate the kernels, this program iteratively calls the
+# preprocessor with different define settings corresponding to all
+# combinations of coulomb/van-der-Waals/geometry options.
+#
+# A main goal in the design was to make this new generator _general_. For
+# this reason we have used a lot of different fields to identify a particular
+# kernel and interaction. Basically, each kernel will have a name like
+#
+# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ()
+#
+# Where XX/YY/ZZ/VF are strings to identify what the kernel computes.
+#
+# Elec/Vdw describe the type of interaction for electrostatics and van der Waals.
+# The geometry settings correspond e.g. to water-water or water-particle kernels,
+# and finally the VF setting is V,F,or VF depending on whether we calculate
+# only the potential, only the force, or both of them. The final string (QQ)
+# is the architecture/language/optimization of the kernel.
+#
+Arch       = 'c'
+
+# Explanation of the 'properties':
+#
+# It is cheap to compute r^2, and the kernels require various other functions of r for
+# different kinds of interaction. Depending on the needs of the kernel and the available
+# processor instructions, this will be done in different ways.
+#
+# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2).
+# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2.
+# 'r' is similarly calculated as r^2*rinv when needed
+# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction
+# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff.
+# 'cutoff' means the interaction is set to 0.0 outside the cutoff
+#
+
+FileHeader = \
+'/*\n' \
+' * Note: this file was generated by the Gromacs '+Arch+' kernel generator.\n' \
+' *\n' \
+' *                This source code is part of\n' \
+' *\n' \
+' *                 G   R   O   M   A   C   S\n' \
+' *\n' \
+' * Copyright (c) 2001-2012, The GROMACS Development Team\n' \
+' *\n' \
+' * Gromacs is a library for molecular simulation and trajectory analysis,\n' \
+' * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for\n' \
+' * a full list of developers and information, check out http://www.gromacs.org\n' \
+' *\n' \
+' * This program is free software; you can redistribute it and/or modify it under\n' \
+' * the terms of the GNU Lesser General Public License as published by the Free\n' \
+' * Software Foundation; either version 2 of the License, or (at your option) any\n' \
+' * later version.\n' \
+' *\n' \
+' * To help fund GROMACS development, we humbly ask that you cite\n' \
+' * the papers people have written on it - you can find them on the website.\n' \
+' */\n'
+
+###############################################
+# ELECTROSTATICS
+# Interactions and flags for them
+###############################################
+ElectrostaticsList = {
+    'None'                    : [],
+    'Coulomb'                 : ['rinv','rinvsq'],
+    'ReactionField'           : ['rinv','rinvsq'],
+    'GeneralizedBorn'         : ['rinv','r'],
+    'CubicSplineTable'        : ['rinv','r','table'],
+    'Ewald'                   : ['rinv','rinvsq','r'],
+}
+
+
+###############################################
+# VAN DER WAALS
+# Interactions and flags for them
+###############################################
+VdwList = {
+    'None'                    : [],
+    'LennardJones'            : ['rinvsq'],
+    'Buckingham'              : ['rinv','rinvsq','r'],
+    'CubicSplineTable'        : ['rinv','r','table'],
+}
+
+
+###############################################
+# MODIFIERS
+# Different ways to adjust/modify interactions to conserve energy
+###############################################
+ModifierList = {
+    'None'                    : [],
+    'ExactCutoff'             : ['exactcutoff'],        # Zero the interaction outside the cutoff, used for reaction-field-zero
+    'PotentialShift'          : ['shift','exactcutoff'],
+    'PotentialSwitch'         : ['r','switch','exactcutoff']
+}
+
+
+###############################################
+# GEOMETRY COMBINATIONS
+###############################################
+GeometryNameList = [
+    [ 'Particle' , 'Particle' ],
+    [ 'Water3'   , 'Particle' ],
+    [ 'Water3'   , 'Water3'   ],     
+    [ 'Water4'   , 'Particle' ],
+    [ 'Water4'   , 'Water4'   ]
+]
+
+
+###############################################
+# POTENTIAL / FORCE
+###############################################
+VFList = [
+    'PotentialAndForce',
+# 'Potential',   # Not used yet
+    'Force'
+]
+
+
+###############################################
+# GEOMETRY PROPERTIES
+###############################################
+# Dictionaries with lists telling which interactions are present
+# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics!
+GeometryElectrostatics = {
+    'Particle'  : [ 0 ],
+    'Particle2' : [ 0 , 1 ],
+    'Particle3' : [ 0 , 1 , 2 ],
+    'Particle4' : [ 0 , 1 , 2 , 3 ],
+    'Water3'    : [ 0 , 1 , 2 ],
+    'Water4'    : [ 1 , 2 , 3 ]
+}
+
+GeometryVdw = {
+    'Particle'  : [ 0 ],
+    'Particle2' : [ 0 , 1 ],
+    'Particle3' : [ 0 , 1 , 2 ],
+    'Particle4' : [ 0 , 1 , 2 , 3 ],
+    'Water3'    : [ 0 ],
+    'Water4'    : [ 0 ]
+}
+
+
+
+
+# Dictionary to abbreviate all strings (mixed from all the lists)
+Abbreviation = {
+    'None'                    : 'None',
+    'Coulomb'                 : 'Coul',
+    'Ewald'                   : 'Ew',
+    'ReactionField'           : 'RF',
+    'GeneralizedBorn'         : 'GB',
+    'CubicSplineTable'        : 'CSTab',
+    'LennardJones'            : 'LJ',
+    'Buckingham'              : 'Bham',
+    'PotentialShift'          : 'Sh',
+    'PotentialSwitch'         : 'Sw',
+    'ExactCutoff'             : 'Cut',
+    'PotentialAndForce'       : 'VF',
+    'Potential'               : 'V',
+    'Force'                   : 'F',
+    'Water3'                  : 'W3',
+    'Water4'                  : 'W4',
+    'Particle'                : 'P1',
+    'Particle2'               : 'P2',
+    'Particle3'               : 'P3',
+    'Particle4'               : 'P4'
+}
+
+
+###############################################
+# Functions
+###############################################
+
+# Return a string with the kernel name from current settings
+def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom):
+    ElecStr = 'Elec' + Abbreviation[KernelElec]
+    if(KernelElecMod!='None'):
+        ElecStr = ElecStr + Abbreviation[KernelElecMod]
+    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
+    if(KernelVdwMod!='None'):
+        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
+    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
+    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch
+
+def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+    ElecStr = 'Elec' + Abbreviation[KernelElec]
+    if(KernelElecMod!='None'):
+        ElecStr = ElecStr + Abbreviation[KernelElecMod]
+    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
+    if(KernelVdwMod!='None'):
+        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
+    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
+    VFStr   = Abbreviation[KernelVF]
+    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch
+
+# Return a string with a declaration to use for the kernel;
+# this will be a sequence of string combinations as well as the actual function name
+# Dont worry about field widths - that is just pretty-printing for the header!
+def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF):
+    KernelStr   = '\"'+KernelName+'\"'
+    ArchStr     = '\"'+Arch+'\"'
+    ElecStr     = '\"'+KernelElec+'\"'
+    ElecModStr  = '\"'+KernelElecMod+'\"'
+    VdwStr      = '\"'+KernelVdw+'\"'
+    VdwModStr   = '\"'+KernelVdwMod+'\"'
+    GeomStr     = '\"'+KernelGeom[0]+KernelGeom[1]+'\"'
+    OtherStr    = '\"'+KernelOther+'\"'
+    VFStr       = '\"'+KernelVF+'\"'
+
+    ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr
+    ThisDecl = '    { '+KernelName+', '+KernelStr+', '+ThisSpec+' }'
+    return ThisDecl
+
+
+# Returns 1 if this kernel should be created, 0 if we should skip it
+# This routine is not critical - it is not the end of the world if we create more kernels,
+# but since the number is pretty large we save both space and compile-time by reducing it a bit.
+def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+
+    # No need for kernels without interactions
+    if(KernelElec=='None' and KernelVdw=='None'):
+        return 0
+
+    # No need for modifiers without interactions
+    if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')):
+        return 0
+
+    # No need for LJ-only water optimization, or water optimization with implicit solvent.
+    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+        return 0
+
+    # Non-matching table settings are pointless
+    if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ):
+        return 0
+
+    # Try to reduce the number of different switch/shift options to get a reasonable number of kernels
+    # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift.
+    if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'):
+        return 0
+    if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'):
+        return 0
+    # For Vdw, we support switch and shift for Lennard-Jones/Buckingham
+    if((KernelVdwMod=='ExactCutoff') or
+       (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham'])):
+        return 0
+
+    # Choose either switch or shift and don't mix them...
+    if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or
+       (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')):
+        return 0
+
+    # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one
+    if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'):
+        return 0
+
+    # Don't use an electrostatics kernel with a modifier if the vdw one does not have one,
+    # unless the electrostatics one is reaction-field with exact cutoff.
+    if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'):
+        if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'):
+            return 0
+        elif(KernelElec!='ReactionField'):
+            return 0
+
+    return 1
+
+
+
+#
+# The preprocessor will automatically expand the interactions for water and other
+# geometries inside the kernel, but to get this right we need to setup a couple
+# of defines - we do them in a separate routine to keep the main loop clean.
+#
+# While this routine might look a bit complex it is actually quite straightforward,
+# and the best news is that you wont have to modify _anything_ for a new geometry
+# as long as you correctly define its Electrostatics/Vdw geometry in the lists above!
+#
+def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines):
+    # What is the _name_ for the i/j group geometry?
+    igeometry            = KernelGeom[0]
+    jgeometry            = KernelGeom[1]
+    # define so we can access it in the source when the preprocessor runs
+    defines['GEOMETRY_I'] = igeometry
+    defines['GEOMETRY_J'] = jgeometry
+
+    # For the i/j groups, extract a python list of which sites have electrostatics
+    # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1]
+    ielec                = GeometryElectrostatics[igeometry]
+    jelec                = GeometryElectrostatics[jgeometry]
+    # Zero out the corresponding lists in case we dont do Elec
+    if(KernelElec=='None'):
+        ielec = []
+        jelec = []
+
+    # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0])
+    iVdw                 = GeometryVdw[igeometry]
+    jVdw                 = GeometryVdw[jgeometry]
+
+    # Zero out the corresponding lists in case we dont do Vdw
+    if(KernelVdw=='None'):
+        iVdw = []
+        jVdw = []
+
+    # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel
+    iany = list(set(ielec+iVdw))  # convert to+from set to make elements unique
+    jany = list(set(jelec+jVdw))
+
+    defines['PARTICLES_ELEC_I'] = ielec
+    defines['PARTICLES_ELEC_J'] = jelec
+    defines['PARTICLES_VDW_I']  = iVdw
+    defines['PARTICLES_VDW_J']  = jVdw
+    defines['PARTICLES_I']      = iany
+    defines['PARTICLES_J']      = jany
+
+    # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done
+    # (and anyij again corresponds to either electrostatics or Vdw)
+    elecij = []
+    Vdwij  = []
+    anyij  = []
+
+    for i in ielec:
+        for j in jelec:
+            elecij.append([i,j])
+
+    for i in iVdw:
+        for j in jVdw:
+            Vdwij.append([i,j])
+
+    for i in iany:
+        for j in jany:
+            if [i,j] in elecij or [i,j] in Vdwij:
+                anyij.append([i,j])
+
+    defines['PAIRS_IJ']     = anyij
+
+    # Make an 2d list-of-distance-properties-to-calculate for i,j
+    ni = max(iany)+1
+    nj = max(jany)+1
+    # Each element properties[i][j] is an empty list
+    properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ]
+    # Add properties to each set
+    for i in range(0,ni):
+        for j in range(0,nj):
+            if [i,j] in elecij:
+                properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod]
+            if [i,j] in Vdwij:
+                properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod]
+            # Add rinv if we need r
+            if 'r' in properties[i][j]:
+                properties[i][j] = properties[i][j] + ['rinv']
+            # Add rsq if we need rinv or rinsq
+            if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]:
+                properties[i][j] = properties[i][j] + ['rsq']
+
+    defines['INTERACTION_FLAGS']    = properties
+
+
+
+def PrintStatistics(ratio):
+    ratio = 100.0*ratio
+    print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio),
+    sys.stdout.flush()
+
+
+
+defines = {}
+kerneldecl = []
+
+cnt     = 0.0
+nelec   = len(ElectrostaticsList)
+nVdw    = len(VdwList)
+nmod    = len(ModifierList)
+ngeom   = len(GeometryNameList)
+
+ntot    = nelec*nmod*nVdw*nmod*ngeom
+
+numKernels = 0
+
+fpdecl = open('nb_kernel_' + Arch + '.c','w')
+fpdecl.write( FileHeader )
+fpdecl.write( '#ifndef nb_kernel_' + Arch + '_h\n' )
+fpdecl.write( '#define nb_kernel_' + Arch + '_h\n\n' )
+fpdecl.write( '#include "../nb_kernel.h"\n\n' )
+
+for KernelElec in ElectrostaticsList:
+    defines['KERNEL_ELEC'] = KernelElec
+
+    for KernelElecMod in ModifierList:
+        defines['KERNEL_MOD_ELEC'] = KernelElecMod
+
+        for KernelVdw in VdwList:
+            defines['KERNEL_VDW'] = KernelVdw
+
+            for KernelVdwMod in ModifierList:
+                defines['KERNEL_MOD_VDW'] = KernelVdwMod
+
+                for KernelGeom in GeometryNameList:
+
+                    cnt += 1
+                    KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.c'
+                    fpkernel = open(KernelFilename,'w')
+                    defines['INCLUDE_HEADER'] = 1  # Include header first time in new file
+                    DoHeader = 1
+
+                    for KernelVF in VFList:
+
+                        KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF)
+
+                        defines['KERNEL_NAME'] = KernelName
+                        defines['KERNEL_VF']   = KernelVF
+
+                        # Check if this is a valid/sane/usable combination
+                        if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+                            continue;
+
+                        # The overall kernel settings determine what the _kernel_ calculates, but for the water
+                        # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions.
+                        # This routine sets defines of what to calculate for each pair of particles in those cases.
+                        SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines)
+
+                        if(DoHeader==1):
+                            fpkernel.write( FileHeader )
+
+                        gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C')
+                        numKernels = numKernels + 1
+
+                        defines['INCLUDE_HEADER'] = 0   # Header has been included once now
+                        DoHeader=0
+
+                        # Append temp file contents to the common kernelfile
+                        fptmp = open(KernelName+'.tmp','r')
+                        fpkernel.writelines(fptmp.readlines())
+                        fptmp.close()
+                        os.remove(KernelName+'.tmp')
+
+                        # Add a declaration for this kernel
+                        fpdecl.write('nb_kernel_t ' + KernelName + ';\n');
+
+                        # Add declaration to the buffer
+                        KernelOther=''
+                        kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF))
+
+                    filesize = fpkernel.tell()
+                    fpkernel.close()
+                    if(filesize==0):
+                        os.remove(KernelFilename)
+
+                    PrintStatistics(cnt/ntot)
+                pass
+            pass
+        pass
+    pass
+pass
+
+# Write out the list of settings and corresponding kernels to the declaration file
+fpdecl.write( '\n\n' )
+fpdecl.write( 'nb_kernel_info_t\n' )
+fpdecl.write( 'kernellist_'+Arch+'[] =\n' )
+fpdecl.write( '{\n' )
+for decl in kerneldecl[0:-1]:
+    fpdecl.write( decl + ',\n' )
+fpdecl.write( kerneldecl[-1] + '\n' )
+fpdecl.write( '};\n\n' )
+fpdecl.write( 'int\n' )
+fpdecl.write( 'kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n\n')
+fpdecl.write( '#endif\n')
+fpdecl.close()
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c
deleted file mode 100644 (file)
index 15232b9..0000000
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel010.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel010
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel010(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel010nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel010nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 19 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h
deleted file mode 100644 (file)
index 0793977..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL010_H_
-#define _NBKERNEL010_H_
-
-/*! \file  nb_kernel010.h
- *  \brief Nonbonded kernel 010 (LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 010 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel010
-                (int *         nri,        int *         iinr,     
-                 int *         jindex,     int *         jjnr,   
-                 int *         shift,      real *        shiftvec,
-                 real *        fshift,     int *         gid, 
-                 real *        pos,        real *        faction,
-                 real *        charge,     real *        facel,
-                 real *        krf,        real *        crf,  
-                 real *        Vc,         int *         type,   
-                 int *         ntype,      real *        vdwparam,
-                 real *        Vvdw,       real *        tabscale,
-                 real *        VFtab,      real *        invsqrta, 
-                 real *        dvda,       real *        gbtabscale,
-                 real *        GBtab,      int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-/*! \brief Nonbonded kernel 010 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel010nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL010_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c
deleted file mode 100644 (file)
index c5fbdbc..0000000
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel020.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel020
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel020(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 61 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel020nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel020nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 48 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h
deleted file mode 100644 (file)
index 3b1c305..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL020_H_
-#define _NBKERNEL020_H_
-
-/*! \file  nb_kernel020.h
- *  \brief Nonbonded kernel 020 (Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 020 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel020
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 020 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel020nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL020_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c
deleted file mode 100644 (file)
index b3a0154..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel030.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel030
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel030(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 54 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel030nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel030nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h
deleted file mode 100644 (file)
index d658d88..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL030_H_
-#define _NBKERNEL030_H_
-
-/*! \file  nb_kernel030.h
- *  \brief Nonbonded kernel 030 (Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 030 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel030
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 030 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel030nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL030_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c
deleted file mode 100644 (file)
index 746b21f..0000000
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel100.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel100
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel100(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 27 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel100nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel100nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 16 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h
deleted file mode 100644 (file)
index 56e6415..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL100_H_
-#define _NBKERNEL100_H_
-
-/*! \file  nb_kernel100.h
- *  \brief Nonbonded kernel 100 (Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 100 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel100
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 100 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel100nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL100_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c
deleted file mode 100644 (file)
index 9853826..0000000
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel101.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel101
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel101(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel101nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel101nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 47 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h
deleted file mode 100644 (file)
index 93bd41a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL101_H_
-#define _NBKERNEL101_H_
-
-/*! \file  nb_kernel101.h
- *  \brief Nonbonded kernel 101 (Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 101 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel101
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 101 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel101nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL101_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c
deleted file mode 100644 (file)
index f26ad66..0000000
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel102.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel102
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel102(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 234 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel102nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel102nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h
deleted file mode 100644 (file)
index 720b3b0..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL102_H_
-#define _NBKERNEL102_H_
-
-/*! \file  nb_kernel102.h
- *  \brief Nonbonded kernel 102 (Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 102 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel102
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 102 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel102nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL102_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c
deleted file mode 100644 (file)
index 24932d0..0000000
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel103.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel103
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel103(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel103nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel103nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 47 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h
deleted file mode 100644 (file)
index 23ffce0..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL103_H_
-#define _NBKERNEL103_H_
-
-/*! \file  nb_kernel103.h
- *  \brief Nonbonded kernel 103 (Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 103 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel103
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 103 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel103nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL103_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c
deleted file mode 100644 (file)
index 6b1c528..0000000
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel104.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel104
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel104(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 234 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel104nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel104nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h
deleted file mode 100644 (file)
index d7d2421..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL104_H_
-#define _NBKERNEL104_H_
-
-/*! \file  nb_kernel104.h
- *  \brief Nonbonded kernel 104 (Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 104 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel104
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 104 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel104nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL104_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c
deleted file mode 100644 (file)
index 6cada4e..0000000
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel110.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel110
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel110(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 38 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel110nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel110nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 24 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h
deleted file mode 100644 (file)
index b56751d..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL110_H_
-#define _NBKERNEL110_H_
-
-/*! \file  nb_kernel110.h
- *  \brief Nonbonded kernel 110 (Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 110 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel110
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 110 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel110nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL110_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c
deleted file mode 100644 (file)
index d8df08c..0000000
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel111.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel111
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel111(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 91 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel111nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel111nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 55 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h
deleted file mode 100644 (file)
index dbf916a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL111_H_
-#define _NBKERNEL111_H_
-
-/*! \file  nb_kernel111.h
- *  \brief Nonbonded kernel 111 (Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 111 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel111
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 111 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel111nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL111_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c
deleted file mode 100644 (file)
index 6948a9b..0000000
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel112.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel112
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel112(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 245 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel112nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel112nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 143 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h
deleted file mode 100644 (file)
index c67acbd..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL112_H_
-#define _NBKERNEL112_H_
-
-/*! \file  nb_kernel112.h
- *  \brief Nonbonded kernel 112 (Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 112 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel112
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 112 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel112nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL112_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c
deleted file mode 100644 (file)
index ce29bbe..0000000
+++ /dev/null
@@ -1,616 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel113.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel113
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel113(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 113 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel113nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel113nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 66 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h
deleted file mode 100644 (file)
index 415c154..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL113_H_
-#define _NBKERNEL113_H_
-
-/*! \file  nb_kernel113.h
- *  \brief Nonbonded kernel 113 (Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 113 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel113
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 113 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel113nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL113_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c
deleted file mode 100644 (file)
index 2371187..0000000
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel114.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel114
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel114(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 267 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel114nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel114nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 154 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h
deleted file mode 100644 (file)
index 4605e88..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL114_H_
-#define _NBKERNEL114_H_
-
-/*! \file  nb_kernel114.h
- *  \brief Nonbonded kernel 114 (Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 114 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel114
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 114 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel114nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL114_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c
deleted file mode 100644 (file)
index f43e397..0000000
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel120.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel120
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel120(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel120nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel120nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 51 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h
deleted file mode 100644 (file)
index c1f04c3..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL120_H_
-#define _NBKERNEL120_H_
-
-/*! \file  nb_kernel120.h
- *  \brief Nonbonded kernel 120 (Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 120 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel120
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 120 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel120nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL120_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c
deleted file mode 100644 (file)
index 22e4fe1..0000000
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel121.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel121
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel121(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 117 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel121nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel121nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 82 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h
deleted file mode 100644 (file)
index 53a80bb..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL121_H_
-#define _NBKERNEL121_H_
-
-/*! \file  nb_kernel121.h
- *  \brief Nonbonded kernel 121 (Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 121 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel121
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 121 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel121nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL121_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c
deleted file mode 100644 (file)
index ad8d288..0000000
+++ /dev/null
@@ -1,849 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel122.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel122
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel122(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 271 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel122nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel122nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 170 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h
deleted file mode 100644 (file)
index f2c4f50..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL122_H_
-#define _NBKERNEL122_H_
-
-/*! \file  nb_kernel122.h
- *  \brief Nonbonded kernel 122 (Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 122 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel122
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 122 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel122nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL122_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c
deleted file mode 100644 (file)
index 5ddfe4c..0000000
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel123.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel123
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel123(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 141 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel123nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel123nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 95 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h
deleted file mode 100644 (file)
index 7dc75c9..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL123_H_
-#define _NBKERNEL123_H_
-
-/*! \file  nb_kernel123.h
- *  \brief Nonbonded kernel 123 (Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 123 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel123
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 123 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel123nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL123_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c
deleted file mode 100644 (file)
index 1623be5..0000000
+++ /dev/null
@@ -1,904 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel124.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel124
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel124(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 295 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel124nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel124nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 183 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h
deleted file mode 100644 (file)
index aa99f5e..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL124_H_
-#define _NBKERNEL124_H_
-
-/*! \file  nb_kernel124.h
- *  \brief Nonbonded kernel 124 (Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 124 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel124
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 124 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel124nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL124_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c
deleted file mode 100644 (file)
index a69b3ce..0000000
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel130.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel130
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel130(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 59 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel130nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel130nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 36 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h
deleted file mode 100644 (file)
index 630e05d..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL130_H_
-#define _NBKERNEL130_H_
-
-/*! \file  nb_kernel130.h
- *  \brief Nonbonded kernel 130 (Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 130 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel130
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 130 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel130nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL130_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c
deleted file mode 100644 (file)
index f6c4539..0000000
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel131.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel131
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel131(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 112 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel131nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel131nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 67 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h
deleted file mode 100644 (file)
index 361beb2..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL131_H_
-#define _NBKERNEL131_H_
-
-/*! \file  nb_kernel131.h
- *  \brief Nonbonded kernel 131 (Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 131 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel131
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 131 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel131nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL131_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c
deleted file mode 100644 (file)
index ba94c59..0000000
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel132.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel132
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel132(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 266 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel132nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel132nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 155 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h
deleted file mode 100644 (file)
index 939ea4b..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL132_H_
-#define _NBKERNEL132_H_
-
-/*! \file  nb_kernel132.h
- *  \brief Nonbonded kernel 132 (Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 132 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel132
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 132 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel132nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL132_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c
deleted file mode 100644 (file)
index 1c42315..0000000
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel133.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel133
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel133(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 134 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel133nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel133nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h
deleted file mode 100644 (file)
index 8cd04e5..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL133_H_
-#define _NBKERNEL133_H_
-
-/*! \file  nb_kernel133.h
- *  \brief Nonbonded kernel 133 (Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 133 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel133
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 133 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel133nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL133_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c
deleted file mode 100644 (file)
index b2e42cb..0000000
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel134.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel134
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel134(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif 
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel134nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel134nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 168 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h
deleted file mode 100644 (file)
index e60fa9a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL134_H_
-#define _NBKERNEL134_H_
-
-/*! \file  nb_kernel134.h
- *  \brief Nonbonded kernel 134 (Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 134 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel134
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 134 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel134nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL134_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c
deleted file mode 100644 (file)
index ad29fdc..0000000
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel200.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel200
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel200(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel200nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel200nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 19 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h
deleted file mode 100644 (file)
index c7307f2..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL200_H_
-#define _NBKERNEL200_H_
-
-/*! \file  nb_kernel200.h
- *  \brief Nonbonded kernel 200 (RF Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 200 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel200
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 200 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel200nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL200_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c
deleted file mode 100644 (file)
index 8d3517f..0000000
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel201.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel201
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel201(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 98 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel201nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel201nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 56 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h
deleted file mode 100644 (file)
index 5bb8a84..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL201_H_
-#define _NBKERNEL201_H_
-
-/*! \file  nb_kernel201.h
- *  \brief Nonbonded kernel 201 (RF Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 201 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel201
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 201 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel201nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL201_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c
deleted file mode 100644 (file)
index 197282f..0000000
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel202.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel202
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel202(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel202nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel202nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 162 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h
deleted file mode 100644 (file)
index 7b3203d..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL202_H_
-#define _NBKERNEL202_H_
-
-/*! \file  nb_kernel202.h
- *  \brief Nonbonded kernel 202 (RF Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 202 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel202
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 202 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel202nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL202_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c
deleted file mode 100644 (file)
index 31d6294..0000000
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel203.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel203
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel203(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 98 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel203nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel203nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 56 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h
deleted file mode 100644 (file)
index 1c7dafc..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL203_H_
-#define _NBKERNEL203_H_
-
-/*! \file  nb_kernel203.h
- *  \brief Nonbonded kernel 203 (RF Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 203 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel203
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 203 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel203nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL203_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c
deleted file mode 100644 (file)
index e9ba676..0000000
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel204.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel204
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel204(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel204nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel204nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 162 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h
deleted file mode 100644 (file)
index 0ee1497..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL204_H_
-#define _NBKERNEL204_H_
-
-/*! \file  nb_kernel204.h
- *  \brief Nonbonded kernel 204 (RF Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 204 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel204
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 204 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel204nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL204_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c
deleted file mode 100644 (file)
index f9db17a..0000000
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel210.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel210
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel210(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 44 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel210nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel210nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 27 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h
deleted file mode 100644 (file)
index 97e4272..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL210_H_
-#define _NBKERNEL210_H_
-
-/*! \file  nb_kernel210.h
- *  \brief Nonbonded kernel 210 (RF Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 210 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel210
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 210 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel210nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL210_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c
deleted file mode 100644 (file)
index 98120c1..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel211.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel211
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel211(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 109 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel211nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel211nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h
deleted file mode 100644 (file)
index fb3ceb3..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL211_H_
-#define _NBKERNEL211_H_
-
-/*! \file  nb_kernel211.h
- *  \brief Nonbonded kernel 211 (RF Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 211 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel211
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 211 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel211nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL211_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c
deleted file mode 100644 (file)
index 79717cb..0000000
+++ /dev/null
@@ -1,865 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel212.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel212
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel212(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 299 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel212nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel212nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 170 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h
deleted file mode 100644 (file)
index 2cb3689..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL212_H_
-#define _NBKERNEL212_H_
-
-/*! \file  nb_kernel212.h
- *  \brief Nonbonded kernel 212 (RF Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 212 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel212
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 212 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel212nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL212_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c
deleted file mode 100644 (file)
index 398af73..0000000
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel213.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel213
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel213(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 131 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel213nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel213nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 75 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h
deleted file mode 100644 (file)
index 9f26dfd..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL213_H_
-#define _NBKERNEL213_H_
-
-/*! \file  nb_kernel213.h
- *  \brief Nonbonded kernel 213 (RF Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 213 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel213
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 213 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel213nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL213_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c
deleted file mode 100644 (file)
index 8d604bc..0000000
+++ /dev/null
@@ -1,918 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel214.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel214
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel214(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 321 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel214nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel214nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 181 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h
deleted file mode 100644 (file)
index 246665d..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL214_H_
-#define _NBKERNEL214_H_
-
-/*! \file  nb_kernel214.h
- *  \brief Nonbonded kernel 214 (RF Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 214 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel214
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 214 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel214nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL214_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c
deleted file mode 100644 (file)
index b1a366b..0000000
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel220.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel220
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel220(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 70 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel220nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel220nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 54 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h
deleted file mode 100644 (file)
index a1a443c..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL220_H_
-#define _NBKERNEL220_H_
-
-/*! \file  nb_kernel220.h
- *  \brief Nonbonded kernel 220 (RF Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 220 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel220
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 220 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel220nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL220_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c
deleted file mode 100644 (file)
index d9a5348..0000000
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel221.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel221
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel221(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel221nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel221nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 91 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h
deleted file mode 100644 (file)
index e6d7b54..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL221_H_
-#define _NBKERNEL221_H_
-
-/*! \file  nb_kernel221.h
- *  \brief Nonbonded kernel 221 (RF Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 221 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel221
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 221 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel221nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL221_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c
deleted file mode 100644 (file)
index 1b33bda..0000000
+++ /dev/null
@@ -1,869 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel222.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel222
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel222(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 325 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel222nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel222nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 197 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h
deleted file mode 100644 (file)
index 16ae5c9..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL222_H_
-#define _NBKERNEL222_H_
-
-/*! \file  nb_kernel222.h
- *  \brief Nonbonded kernel 222 (RF Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 222 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel222
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 222 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel222nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL222_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c
deleted file mode 100644 (file)
index 6008257..0000000
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel223.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel223
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel223(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 159 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel223nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel223nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 104 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h
deleted file mode 100644 (file)
index ee732ca..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL223_H_
-#define _NBKERNEL223_H_
-
-/*! \file  nb_kernel223.h
- *  \brief Nonbonded kernel 223 (RF Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 223 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel223
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 223 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel223nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL223_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c
deleted file mode 100644 (file)
index c25a268..0000000
+++ /dev/null
@@ -1,924 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel224.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel224
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel224(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 349 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel224nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel224nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 210 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h
deleted file mode 100644 (file)
index 3be3bc5..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL224_H_
-#define _NBKERNEL224_H_
-
-/*! \file  nb_kernel224.h
- *  \brief Nonbonded kernel 224 (RF Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 224 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel224
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 224 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel224nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL224_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c
deleted file mode 100644 (file)
index 88877fb..0000000
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel230.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel230
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel230(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 65 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel230nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel230nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 39 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h
deleted file mode 100644 (file)
index e3f352c..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL230_H_
-#define _NBKERNEL230_H_
-
-/*! \file  nb_kernel230.h
- *  \brief Nonbonded kernel 230 (RF Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 230 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel230
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 230 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel230nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL230_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c
deleted file mode 100644 (file)
index f7f226e..0000000
+++ /dev/null
@@ -1,635 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel231.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel231
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel231(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 130 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel231nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel231nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 76 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h
deleted file mode 100644 (file)
index 7d4f88d..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL231_H_
-#define _NBKERNEL231_H_
-
-/*! \file  nb_kernel231.h
- *  \brief Nonbonded kernel 231 (RF Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 231 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel231
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 231 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel231nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL231_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c
deleted file mode 100644 (file)
index 03b8370..0000000
+++ /dev/null
@@ -1,921 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel232.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel232
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel232(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 320 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel232nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel232nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 182 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h
deleted file mode 100644 (file)
index e21236a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL232_H_
-#define _NBKERNEL232_H_
-
-/*! \file  nb_kernel232.h
- *  \brief Nonbonded kernel 232 (RF Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 232 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel232
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 232 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel232nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL232_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c
deleted file mode 100644 (file)
index 2c85ac8..0000000
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel233.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel233
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel233(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 152 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel233nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel233nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 89 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h
deleted file mode 100644 (file)
index 59001f9..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL233_H_
-#define _NBKERNEL233_H_
-
-/*! \file  nb_kernel233.h
- *  \brief Nonbonded kernel 233 (RF Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 233 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel233
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 233 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel233nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL233_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c
deleted file mode 100644 (file)
index 9d2ddf3..0000000
+++ /dev/null
@@ -1,975 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel234.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel234
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel234(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 342 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel234nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel234nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 195 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h
deleted file mode 100644 (file)
index 27f3f5b..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL234_H_
-#define _NBKERNEL234_H_
-
-/*! \file  nb_kernel234.h
- *  \brief Nonbonded kernel 234 (RF Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 234 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel234
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 234 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel234nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL234_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c
deleted file mode 100644 (file)
index 4cbeac2..0000000
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel300.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel300
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel300(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 42 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel300nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel300nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 26 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h
deleted file mode 100644 (file)
index 99516e7..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL300_H_
-#define _NBKERNEL300_H_
-
-/*! \file  nb_kernel300.h
- *  \brief Nonbonded kernel 300 (Tab Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 300 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel300
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 300 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel300nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL300_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c
deleted file mode 100644 (file)
index da8de57..0000000
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel301.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel301
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel301(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel301nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel301nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 77 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h
deleted file mode 100644 (file)
index c28ace6..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL301_H_
-#define _NBKERNEL301_H_
-
-/*! \file  nb_kernel301.h
- *  \brief Nonbonded kernel 301 (Tab Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 301 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel301
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 301 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel301nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL301_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c
deleted file mode 100644 (file)
index 83ee5df..0000000
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel302.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel302
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel302(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 369 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel302nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel302nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 225 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h
deleted file mode 100644 (file)
index b99d307..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL302_H_
-#define _NBKERNEL302_H_
-
-/*! \file  nb_kernel302.h
- *  \brief Nonbonded kernel 302 (Tab Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 302 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel302
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 302 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel302nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL302_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c
deleted file mode 100644 (file)
index d1a7e91..0000000
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel303.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel303
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel303(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel303nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel303nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 77 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h
deleted file mode 100644 (file)
index df17ff6..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL303_H_
-#define _NBKERNEL303_H_
-
-/*! \file  nb_kernel303.h
- *  \brief Nonbonded kernel 303 (Tab Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 303 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel303
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 303 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel303nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL303_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c
deleted file mode 100644 (file)
index 9c842d0..0000000
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel304.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel304
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel304(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 369 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel304nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel304nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 225 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h
deleted file mode 100644 (file)
index cf5182c..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL304_H_
-#define _NBKERNEL304_H_
-
-/*! \file  nb_kernel304.h
- *  \brief Nonbonded kernel 304 (Tab Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 304 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel304
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 304 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel304nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL304_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c
deleted file mode 100644 (file)
index d9bac05..0000000
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel310.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel310
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel310(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 55 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel310nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel310nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 34 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h
deleted file mode 100644 (file)
index 7341359..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL310_H_
-#define _NBKERNEL310_H_
-
-/*! \file  nb_kernel310.h
- *  \brief Nonbonded kernel 310 (Tab Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 310 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel310
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 310 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel310nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL310_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c
deleted file mode 100644 (file)
index a046fe3..0000000
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel311.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel311
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel311(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 138 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel311nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel311nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 85 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h
deleted file mode 100644 (file)
index 140691a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL311_H_
-#define _NBKERNEL311_H_
-
-/*! \file  nb_kernel311.h
- *  \brief Nonbonded kernel 311 (Tab Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 311 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel311
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 311 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel311nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL311_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c
deleted file mode 100644 (file)
index 623b323..0000000
+++ /dev/null
@@ -1,1151 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel312.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel312
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel312(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 382 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel312nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel312nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 233 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h
deleted file mode 100644 (file)
index e5849eb..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL312_H_
-#define _NBKERNEL312_H_
-
-/*! \file  nb_kernel312.h
- *  \brief Nonbonded kernel 312 (Tab Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" 
-{
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 312 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel312
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 312 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel312nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL312_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c
deleted file mode 100644 (file)
index c36a282..0000000
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel313.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel313
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel313(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 158 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel313nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel313nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 96 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h
deleted file mode 100644 (file)
index 63005e6..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL313_H_
-#define _NBKERNEL313_H_
-
-/*! \file  nb_kernel313.h
- *  \brief Nonbonded kernel 313 (Tab Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 313 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel313
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 313 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel313nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL313_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c
deleted file mode 100644 (file)
index 2116e24..0000000
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel314.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel314
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel314(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 402 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel314nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel314nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 244 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h
deleted file mode 100644 (file)
index c783dfb..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL314_H_
-#define _NBKERNEL314_H_
-
-/*! \file  nb_kernel314.h
- *  \brief Nonbonded kernel 314 (Tab Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 314 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel314
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 314 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel314nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL314_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c
deleted file mode 100644 (file)
index d5c7179..0000000
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel320.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel320
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel320(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 81 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel320nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel320nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 61 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h
deleted file mode 100644 (file)
index fa4c8e0..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL320_H_
-#define _NBKERNEL320_H_
-
-/*! \file  nb_kernel320.h
- *  \brief Nonbonded kernel 320 (Tab Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 320 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel320
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 320 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel320nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL320_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c
deleted file mode 100644 (file)
index 042786e..0000000
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel321.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel321
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel321(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 164 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel321nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel321nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 112 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h
deleted file mode 100644 (file)
index 076266b..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL321_H_
-#define _NBKERNEL321_H_
-
-/*! \file  nb_kernel321.h
- *  \brief Nonbonded kernel 321 (Tab Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 321 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel321
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 321 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel321nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL321_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c
deleted file mode 100644 (file)
index 874c797..0000000
+++ /dev/null
@@ -1,1155 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel322.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel322
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel322(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 408 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel322nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel322nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 260 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h
deleted file mode 100644 (file)
index 46baec9..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL322_H_
-#define _NBKERNEL322_H_
-
-/*! \file  nb_kernel322.h
- *  \brief Nonbonded kernel 322 (Tab Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 322 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel322
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 322 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel322nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL322_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c
deleted file mode 100644 (file)
index da9d900..0000000
+++ /dev/null
@@ -1,729 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel323.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel323
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel323(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 186 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel323nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel323nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h
deleted file mode 100644 (file)
index 0d52978..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL323_H_
-#define _NBKERNEL323_H_
-
-/*! \file  nb_kernel323.h
- *  \brief Nonbonded kernel 323 (Tab Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 323 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel323
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 323 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel323nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL323_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c
deleted file mode 100644 (file)
index 6cfa305..0000000
+++ /dev/null
@@ -1,1209 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel324.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel324
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel324(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 430 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel324nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel324nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 273 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h
deleted file mode 100644 (file)
index d5585b7..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL324_H_
-#define _NBKERNEL324_H_
-
-/*! \file  nb_kernel324.h
- *  \brief Nonbonded kernel 324 (Tab Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 324 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel324
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 324 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel324nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL324_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c
deleted file mode 100644 (file)
index e88c093..0000000
+++ /dev/null
@@ -1,520 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel330.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel330
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel330(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 68 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel330nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel330nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 42 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h
deleted file mode 100644 (file)
index 71813c6..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL330_H_
-#define _NBKERNEL330_H_
-
-/*! \file  nb_kernel330.h
- *  \brief Nonbonded kernel 330 (Tab Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 330 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel330
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 330 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel330nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL330_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c
deleted file mode 100644 (file)
index 679ba80..0000000
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel331.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel331
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel331(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 151 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel331nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel331nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 93 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h
deleted file mode 100644 (file)
index 51a52ee..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL331_H_
-#define _NBKERNEL331_H_
-
-/*! \file  nb_kernel331.h
- *  \brief Nonbonded kernel 331 (Tab Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 331 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel331
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 331 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel331nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL331_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c
deleted file mode 100644 (file)
index 746e49d..0000000
+++ /dev/null
@@ -1,1180 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel332.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel332
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel332(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 395 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel332nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel332nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 241 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h
deleted file mode 100644 (file)
index 89bb6e9..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL332_H_
-#define _NBKERNEL332_H_
-
-/*! \file  nb_kernel332.h
- *  \brief Nonbonded kernel 332 (Tab Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 332 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel332
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 332 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel332nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL332_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c
deleted file mode 100644 (file)
index 4b9cc4f..0000000
+++ /dev/null
@@ -1,774 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel333.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel333
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel333(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 179 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel333nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel333nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 110 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h
deleted file mode 100644 (file)
index 80c6903..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL333_H_
-#define _NBKERNEL333_H_
-
-/*! \file  nb_kernel333.h
- *  \brief Nonbonded kernel 333 (Tab Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 333 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel333
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 333 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel333nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL333_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c
deleted file mode 100644 (file)
index 6db5c8b..0000000
+++ /dev/null
@@ -1,1254 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel334.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel334
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel334(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 423 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel334nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel334nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 258 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h
deleted file mode 100644 (file)
index 6f3dea0..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL334_H_
-#define _NBKERNEL334_H_
-
-/*! \file  nb_kernel334.h
- *  \brief Nonbonded kernel 334 (Tab Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 334 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel334
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 334 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel334nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL334_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c
deleted file mode 100644 (file)
index fdc00b7..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel400.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel400
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel400(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;
-       scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent); 
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-       
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;   
-                       vgbtot           = 0;
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-               vgbtot           = vgbtot + vgb;
-                fscal            = -(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 49 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            gpol[ggid]       = gpol[ggid] + vgbtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-       
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel400nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel400nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 29 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h
deleted file mode 100644 (file)
index 301f63e..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL400_H_
-#define _NBKERNEL400_H_
-
-/*! \file  nb_kernel400.h
- *  \brief Nonbonded kernel 400 (GB Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 400 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel400
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 400 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel400nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL400_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c
deleted file mode 100644 (file)
index 3a4f624..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel410.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel410
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel410(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;     
-       scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent);  
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-   
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;  
-            vgbtot           = 0;
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-                vgbtot           = vgbtot + vgb;
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-                               
-                /* Inner loop uses 62 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            gpol[ggid]       = gpol[ggid] + vgbtot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-  
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel410nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel410nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 37 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h
deleted file mode 100644 (file)
index 24e3846..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL410_H_
-#define _NBKERNEL410_H_
-
-/*! \file  nb_kernel410.h
- *  \brief Nonbonded kernel 410 (GB Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 410 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel410
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 410 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel410nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL410_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c
deleted file mode 100644 (file)
index b499eaf..0000000
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel420.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel420
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel420(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 88 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel420nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel420nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h
deleted file mode 100644 (file)
index b9529bf..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL420_H_
-#define _NBKERNEL420_H_
-
-/*! \file  nb_kernel420.h
- *  \brief Nonbonded kernel 420 (GB Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 420 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel420
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-/*! \brief Nonbonded kernel 420 without forces.
- *
- *  \internal  Generated at compile time in either C or Fortran
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel420nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL420_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c
deleted file mode 100644 (file)
index 37c113a..0000000
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel430.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel430
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel430(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;   
-    scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent);
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-            dvdasum          = 0;    
-           vgbtot           = 0;
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-               vgbtot           = vgbtot + vgb;
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-                       gpol[ggid]       = gpol[ggid] + vgbtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel430nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel430nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 49 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h
deleted file mode 100644 (file)
index fec215b..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL430_H_
-#define _NBKERNEL430_H_
-
-/*! \file  nb_kernel430.h
- *  \brief Nonbonded kernel 430 (GB Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 430 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel430
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 430 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel430nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL430_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..1a01086
--- /dev/null
@@ -0,0 +1,438 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*81);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 74 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*74);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..19788cc
--- /dev/null
@@ -0,0 +1,686 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 165 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*165);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 150 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*150);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..3eda3ba
--- /dev/null
@@ -0,0 +1,1248 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 408 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*408);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*369);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..3cc3b21
--- /dev/null
@@ -0,0 +1,772 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 187 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*187);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 172 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*172);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..2b5f528
--- /dev/null
@@ -0,0 +1,1342 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 430 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*430);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 391 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*391);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..cbd63bd
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 73 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*73);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 61 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*61);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..fc4d13a
--- /dev/null
@@ -0,0 +1,710 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 157 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*157);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..42b8b57
--- /dev/null
@@ -0,0 +1,1272 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 400 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*400);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 356 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*356);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..8a9584e
--- /dev/null
@@ -0,0 +1,808 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 181 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*181);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 161 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*161);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..24fcfde
--- /dev/null
@@ -0,0 +1,1378 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 424 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*424);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 380 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*380);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..7e32a82
--- /dev/null
@@ -0,0 +1,434 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 55 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*55);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 46 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*46);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..5aad06d
--- /dev/null
@@ -0,0 +1,682 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*139);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 122 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*122);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..29d1d79
--- /dev/null
@@ -0,0 +1,1244 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 382 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*382);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 341 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*341);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..bbaf3be
--- /dev/null
@@ -0,0 +1,762 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 158 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*158);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 141 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*141);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..8e939d2
--- /dev/null
@@ -0,0 +1,1332 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 401 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*401);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*360);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..9366948
--- /dev/null
@@ -0,0 +1,392 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 42 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*42);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 38 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*38);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..523f481
--- /dev/null
@@ -0,0 +1,640 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 114 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*114);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..90b1757
--- /dev/null
@@ -0,0 +1,1202 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 333 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*333);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..a661b19
--- /dev/null
@@ -0,0 +1,640 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 114 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*114);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..02fbd64
--- /dev/null
@@ -0,0 +1,1202 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 333 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*333);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..f7be552
--- /dev/null
@@ -0,0 +1,402 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 67 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*67);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..32bdada
--- /dev/null
@@ -0,0 +1,598 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..4fe9979
--- /dev/null
@@ -0,0 +1,1004 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 282 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*282);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..03af0bb
--- /dev/null
@@ -0,0 +1,682 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 145 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*145);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..8274b81
--- /dev/null
@@ -0,0 +1,1096 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*304);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..f6a33ac
--- /dev/null
@@ -0,0 +1,454 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 62 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*62);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..300dc84
--- /dev/null
@@ -0,0 +1,650 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 118 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*118);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..4023e7a
--- /dev/null
@@ -0,0 +1,1056 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 277 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*277);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..38fe3d3
--- /dev/null
@@ -0,0 +1,732 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*139);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..c1ddbc0
--- /dev/null
@@ -0,0 +1,1146 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 298 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*298);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..f256bcf
--- /dev/null
@@ -0,0 +1,394 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 40 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*40);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..a04f56e
--- /dev/null
@@ -0,0 +1,590 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..fdbe63f
--- /dev/null
@@ -0,0 +1,996 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 255 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*255);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..a51c687
--- /dev/null
@@ -0,0 +1,672 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 116 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*116);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..b78968c
--- /dev/null
@@ -0,0 +1,1086 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 275 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*275);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..36bef38
--- /dev/null
@@ -0,0 +1,356 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 28 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*28);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..892270f
--- /dev/null
@@ -0,0 +1,552 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 84 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*84);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..81b1831
--- /dev/null
@@ -0,0 +1,958 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 243 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*243);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..ebb7ac0
--- /dev/null
@@ -0,0 +1,552 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 84 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*84);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..cb255fc
--- /dev/null
@@ -0,0 +1,958 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 243 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*243);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..16f6b23
--- /dev/null
@@ -0,0 +1,454 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 111 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*111);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 69 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*69);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..956f68d
--- /dev/null
@@ -0,0 +1,702 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 195 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*195);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..249d2b8
--- /dev/null
@@ -0,0 +1,1264 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 438 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*438);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*332);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..c3619b7
--- /dev/null
@@ -0,0 +1,800 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 218 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*218);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 160 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*160);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..bfe65ef
--- /dev/null
@@ -0,0 +1,1370 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 461 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*461);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 355 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*355);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..88c102f
--- /dev/null
@@ -0,0 +1,450 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*59);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*41);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..6cd740f
--- /dev/null
@@ -0,0 +1,698 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 143 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*143);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 109 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*109);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..9cdb942
--- /dev/null
@@ -0,0 +1,1260 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 386 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*386);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*304);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..3e87257
--- /dev/null
@@ -0,0 +1,790 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 163 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*163);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 129 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*129);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..2cb43bd
--- /dev/null
@@ -0,0 +1,1360 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 406 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*406);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 324 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*324);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..664442f
--- /dev/null
@@ -0,0 +1,406 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 42 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*42);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..ab8259c
--- /dev/null
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..ff0122b
--- /dev/null
@@ -0,0 +1,1216 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..e760e22
--- /dev/null
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..6565b71
--- /dev/null
@@ -0,0 +1,1216 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..fe8ab28
--- /dev/null
@@ -0,0 +1,497 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 101 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*101);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 97 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*97);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..0eca1c6
--- /dev/null
@@ -0,0 +1,791 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 219 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*219);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 211 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*211);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..e0002e0
--- /dev/null
@@ -0,0 +1,1491 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 564 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*564);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 544 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*544);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..76c59f2
--- /dev/null
@@ -0,0 +1,907 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 256 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*256);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 248 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*248);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..743fb40
--- /dev/null
@@ -0,0 +1,1615 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 601 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*601);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 581 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*581);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..07159bf
--- /dev/null
@@ -0,0 +1,495 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 75 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*75);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*71);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..b49875a
--- /dev/null
@@ -0,0 +1,789 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 193 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*193);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 185 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*185);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..3634b5d
--- /dev/null
@@ -0,0 +1,1489 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 538 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*538);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 518 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*518);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..333cf6c
--- /dev/null
@@ -0,0 +1,905 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 230 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*230);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 222 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*222);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..0872f4d
--- /dev/null
@@ -0,0 +1,1613 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 575 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*575);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 555 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*555);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..f685701
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*59);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 57 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*57);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..7f6e7b5
--- /dev/null
@@ -0,0 +1,745 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 177 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*177);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 171 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*171);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..f6f0179
--- /dev/null
@@ -0,0 +1,1445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 522 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*522);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 504 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*504);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..48f7d36
--- /dev/null
@@ -0,0 +1,745 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 177 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*177);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 171 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*171);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..6039b2e
--- /dev/null
@@ -0,0 +1,1445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 522 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*522);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 504 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*504);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..a04671a
--- /dev/null
@@ -0,0 +1,430 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 79 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*79);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 69 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*69);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..92aef65
--- /dev/null
@@ -0,0 +1,658 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 161 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*161);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..984892f
--- /dev/null
@@ -0,0 +1,1160 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 398 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*398);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*332);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..ff5f280
--- /dev/null
@@ -0,0 +1,746 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 184 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*184);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 160 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*160);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..1757d44
--- /dev/null
@@ -0,0 +1,1256 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 421 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*421);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 355 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*355);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..679e890
--- /dev/null
@@ -0,0 +1,482 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 74 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*74);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*59);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..fb05c2c
--- /dev/null
@@ -0,0 +1,710 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 156 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*156);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 127 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*127);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..a71be52
--- /dev/null
@@ -0,0 +1,1212 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 393 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*393);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 322 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*322);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..f691cfc
--- /dev/null
@@ -0,0 +1,796 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 178 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*178);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 149 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*149);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..ade4412
--- /dev/null
@@ -0,0 +1,1306 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 415 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*415);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 344 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*344);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..83beeaf
--- /dev/null
@@ -0,0 +1,426 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*53);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*41);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..053a2f3
--- /dev/null
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 135 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*135);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 109 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*109);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..0697581
--- /dev/null
@@ -0,0 +1,1156 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 372 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*372);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*304);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..3c912ed
--- /dev/null
@@ -0,0 +1,736 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 155 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*155);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 129 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*129);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..0079058
--- /dev/null
@@ -0,0 +1,1246 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 392 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*392);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 324 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*324);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..dcb5653
--- /dev/null
@@ -0,0 +1,388 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*41);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..161a335
--- /dev/null
@@ -0,0 +1,616 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..3e9e4ec
--- /dev/null
@@ -0,0 +1,1118 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*360);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..c690477
--- /dev/null
@@ -0,0 +1,616 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..035931c
--- /dev/null
@@ -0,0 +1,1118 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*360);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..92b5b6b
--- /dev/null
@@ -0,0 +1,491 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 97 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*97);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 92 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*92);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..e436b3e
--- /dev/null
@@ -0,0 +1,533 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 91 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*91);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..1d61f4f
--- /dev/null
@@ -0,0 +1,487 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*71);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 64 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*64);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..0ef6e30
--- /dev/null
@@ -0,0 +1,445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*15 + inneriter*58);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 56 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*56);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..97ce578
--- /dev/null
@@ -0,0 +1,399 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 92 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*92);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*58);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..d5c66d2
--- /dev/null
@@ -0,0 +1,437 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 79 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*79);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 77 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*77);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..434429e
--- /dev/null
@@ -0,0 +1,377 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 61 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*61);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*58);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..b841a2b
--- /dev/null
@@ -0,0 +1,425 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 55 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*55);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 47 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*47);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..d5b5b1d
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 37 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*37);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..0fc2732
--- /dev/null
@@ -0,0 +1,435 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*53);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 51 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*51);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..2e75940
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..742e123
--- /dev/null
@@ -0,0 +1,431 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*102);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..90d7032
--- /dev/null
@@ -0,0 +1,645 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 166 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*166);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..83d23be
--- /dev/null
@@ -0,0 +1,1105 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 349 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*349);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..8235742
--- /dev/null
@@ -0,0 +1,739 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 188 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*188);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..b93632a
--- /dev/null
@@ -0,0 +1,1207 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 371 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*371);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..d0f7070
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 89 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*89);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 82 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*82);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..d818006
--- /dev/null
@@ -0,0 +1,683 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 153 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*153);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 136 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*136);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..891995b
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 336 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*336);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 289 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*289);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..bee93e5
--- /dev/null
@@ -0,0 +1,777 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 175 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*175);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 158 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*158);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..a5d8511
--- /dev/null
@@ -0,0 +1,1245 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 358 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*358);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 311 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*311);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..8d00b31
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 66 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*66);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..1e55e56
--- /dev/null
@@ -0,0 +1,691 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 130 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*130);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..1a1ef76
--- /dev/null
@@ -0,0 +1,1151 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 313 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*313);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..967a003
--- /dev/null
@@ -0,0 +1,773 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 151 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*151);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..3c6532a
--- /dev/null
@@ -0,0 +1,1241 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 334 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*334);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..396f264
--- /dev/null
@@ -0,0 +1,423 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 49 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*49);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..41e87fd
--- /dev/null
@@ -0,0 +1,637 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 113 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*113);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..c7d255b
--- /dev/null
@@ -0,0 +1,1097 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 296 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*296);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..3a290c4
--- /dev/null
@@ -0,0 +1,729 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 133 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*133);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..7d23b9f
--- /dev/null
@@ -0,0 +1,1197 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 316 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*316);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..fbbf0bf
--- /dev/null
@@ -0,0 +1,467 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*63);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 56 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*56);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..653589a
--- /dev/null
@@ -0,0 +1,681 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 127 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*127);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 110 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*110);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..09f9e6e
--- /dev/null
@@ -0,0 +1,1141 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 310 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*310);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 263 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*263);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..ad6d1b6
--- /dev/null
@@ -0,0 +1,775 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 149 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*149);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 132 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*132);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..3262295
--- /dev/null
@@ -0,0 +1,1243 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*332);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 285 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*285);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..b200f3a
--- /dev/null
@@ -0,0 +1,379 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..c53d964
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..8c8af55
--- /dev/null
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..e8a4b2c
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..4f6c8d7
--- /dev/null
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..3af1e9d
--- /dev/null
@@ -0,0 +1,407 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*71);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..e59218c
--- /dev/null
@@ -0,0 +1,601 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 135 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*135);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..848ef5c
--- /dev/null
@@ -0,0 +1,1001 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 318 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*318);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..c45c408
--- /dev/null
@@ -0,0 +1,685 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 157 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*157);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..4ec58cc
--- /dev/null
@@ -0,0 +1,1093 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 340 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*340);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..6c071ee
--- /dev/null
@@ -0,0 +1,459 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 66 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*66);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..24a3000
--- /dev/null
@@ -0,0 +1,653 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 130 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*130);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..84a90f5
--- /dev/null
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 313 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*313);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..2184f12
--- /dev/null
@@ -0,0 +1,735 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 151 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*151);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..ca01432
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 334 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*334);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..ca7e9b8
--- /dev/null
@@ -0,0 +1,399 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 44 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*44);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..90fb09b
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*108);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..0e3f3d1
--- /dev/null
@@ -0,0 +1,993 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 291 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*291);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..6cd4719
--- /dev/null
@@ -0,0 +1,675 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*128);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..5c90acb
--- /dev/null
@@ -0,0 +1,1083 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 311 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*311);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c
new file mode 100644 (file)
index 0000000..2f74aea
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c
new file mode 100644 (file)
index 0000000..ea3ce8b
--- /dev/null
@@ -0,0 +1,555 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c
new file mode 100644 (file)
index 0000000..d8ae0cc
--- /dev/null
@@ -0,0 +1,955 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c
new file mode 100644 (file)
index 0000000..9208f9f
--- /dev/null
@@ -0,0 +1,555 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c
new file mode 100644 (file)
index 0000000..0a198f8
--- /dev/null
@@ -0,0 +1,955 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
index bcb08bfb585a35f86947a656c9b7ed87bc291c96..25dbb9c0753b9d6482d82a5b7b21bbf9f8c0f213 100644 (file)
@@ -39,6 +39,7 @@
 #include "smalloc.h"
 
 #include "nb_kernel_allvsall.h"
+#include "nrnb.h"
 
 typedef struct 
 {
@@ -231,16 +232,13 @@ setup_aadata(gmx_allvsall_data_t **  p_aadata,
 
 
 void
-nb_kernel_allvsall(t_forcerec *           fr,
-                                  t_mdatoms *            mdatoms,
-                                  t_blocka *             excl,    
-                                  real *                 x,
-                                  real *                 f,
-                                  real *                 Vc,
-                                  real *                 Vvdw,
-                                  int *                  outeriter,
-                                  int *                  inneriter,
-                                  void *                 work)
+nb_kernel_allvsall(t_nblist *                nlist,
+                   rvec *                    xx,
+                   rvec *                    ff,
+                   t_forcerec *              fr,
+                   t_mdatoms *               mdatoms,
+                   nb_kernel_data_t *        kernel_data,
+                   t_nrnb *                  nrnb)
 {
        gmx_allvsall_data_t *aadata;
        int        natoms;
@@ -263,20 +261,30 @@ nb_kernel_allvsall(t_forcerec *           fr,
     real       vcoul,vctot;
     real       c6,c12,Vvdw6,Vvdw12,Vvdwtot;
     real       fscal;
-    
+    t_blocka * excl;
+    real *     f;
+    real *     x;
+    real *     Vvdw;
+    real *     Vc;
+
+    x                   = xx[0];
+    f                   = ff[0];
        charge              = mdatoms->chargeA;
        type                = mdatoms->typeA;
        facel               = fr->epsfac;
     natoms              = mdatoms->nr;
        ni0                 = mdatoms->start;
        ni1                 = mdatoms->start+mdatoms->homenr;
-    
-    aadata = *((gmx_allvsall_data_t **)work);
+    aadata              = fr->AllvsAll_work;
+    excl                = kernel_data->exclusions;
+
+    Vc                  = kernel_data->energygrp_elec;
+    Vvdw                = kernel_data->energygrp_vdw;
 
        if(aadata==NULL)
        {
                setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
-        *((gmx_allvsall_data_t **)work) = aadata;
+        fr->AllvsAll_work  = aadata;
        }
         
        for(i=ni0; i<ni1; i++)
@@ -430,9 +438,10 @@ nb_kernel_allvsall(t_forcerec *           fr,
                /* Outer loop uses 6 flops/iteration */
        }    
       
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = ni1-ni0;         
-    *inneriter       = (ni1-ni0)*natoms/2;         
+    /* 12 flops per outer iteration
+     * 19 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
 }
 
 
index d8ca9d2a046b27e773adaaf7b8aac51b925cf64e..950a316faf6a2c0abf6a74b605b179ab2b97c969 100644 (file)
@@ -8,17 +8,16 @@
 
 #include "types/simple.h"
 #include "typedefs.h"
+#include "../nb_kernel.h"
 
 void
-nb_kernel_allvsall(t_forcerec *           fr,
-                  t_mdatoms *            mdatoms,
-                  t_blocka *             excl,    
-                  real *                 x,
-                  real *                 f,
-                  real *                 Vc,
-                  real *                 Vvdw,
-                  int *                  outeriter,
-                  int *                  inneriter,
-                  void *                 work);
+nb_kernel_allvsall(t_nblist *                nlist,
+                   rvec *                    x,
+                   rvec *                    f,
+                   t_forcerec *              fr,
+                   t_mdatoms *               mdatoms,
+                   nb_kernel_data_t *        kernel_data,
+                   t_nrnb *                  nrnb);
+
 
 #endif
index 809fb8ccce7d746e0f831815bad89633edbf7ada..0709b7fd572fd6e8086c8765bf25fd894c8e5d23 100644 (file)
@@ -39,6 +39,7 @@
 #include "smalloc.h"
 
 #include "nb_kernel_allvsallgb.h"
+#include "nrnb.h"
 
 typedef struct 
 {
@@ -232,17 +233,13 @@ setup_aadata(gmx_allvsall_data_t **  p_aadata,
 
 
 void
-nb_kernel_allvsallgb(t_forcerec *           fr,
-                     t_mdatoms *            mdatoms,
-                     t_blocka *             excl,    
-                     real *                 x,
-                     real *                 f,
-                     real *                 Vc,
-                     real *                 Vvdw,
-                     real *                 vpol,
-                     int *                  outeriter,
-                     int *                  inneriter,
-                     void *                 work)
+nb_kernel_allvsallgb(t_nblist *                nlist,
+                     rvec *                    xx,
+                     rvec *                    ff,
+                     t_forcerec *              fr,
+                     t_mdatoms *               mdatoms,
+                     nb_kernel_data_t *        kernel_data,
+                     t_nrnb *                  nrnb)
 {
        gmx_allvsall_data_t *aadata;
        int        natoms;
@@ -273,26 +270,39 @@ nb_kernel_allvsallgb(t_forcerec *           fr,
     real       fscal,dvdatmp,fijC,vgb;
     real       Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt;
     real       dvdaj,gbscale,isaprod,isai,isaj,gbtabscale;
-    
+    real *     f;
+    real *     x;
+    t_blocka * excl;
+    real *     Vvdw;
+    real *     Vc;
+    real *     vpol;
+
+    x                   = xx[0];
+    f                   = ff[0];
        charge              = mdatoms->chargeA;
        type                = mdatoms->typeA;
     gbfactor            = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
        facel               = fr->epsfac;
-    GBtab               = fr->gbtab.tab;
+    GBtab               = fr->gbtab.data;
     gbtabscale          = fr->gbtab.scale;
     invsqrta            = fr->invsqrta;
     dvda                = fr->dvda;
-    
+    vpol                = kernel_data->energygrp_polarization;
+
     natoms              = mdatoms->nr;
        ni0                 = mdatoms->start;
        ni1                 = mdatoms->start+mdatoms->homenr;
     
-    aadata = *((gmx_allvsall_data_t **)work);
+    aadata              = fr->AllvsAll_work;
+    excl                = kernel_data->exclusions;
+
+    Vc                  = kernel_data->energygrp_elec;
+    Vvdw                = kernel_data->energygrp_vdw;
 
        if(aadata==NULL)
        {
                setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
-        *((gmx_allvsall_data_t **)work) = aadata;
+        fr->AllvsAll_work  = aadata;
        }
 
        for(i=ni0; i<ni1; i++)
@@ -506,9 +516,10 @@ nb_kernel_allvsallgb(t_forcerec *           fr,
                /* Outer loop uses 6 flops/iteration */
        }    
 
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = ni1-ni0;         
-    *inneriter       = (ni1-ni0)*natoms/2;         
+    /* 12 flops per outer iteration
+     * 19 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
 }
 
 
index 2661ff7f3f97bf66e91a798164cc174efd392a56..2c3ea9194e034eae55ba64bcc533db6264a3a64a 100644 (file)
@@ -8,18 +8,15 @@
 
 #include "types/simple.h"
 #include "typedefs.h"
+#include "../nb_kernel.h"
 
 void
-nb_kernel_allvsallgb(t_forcerec *           fr,
-                     t_mdatoms *            mdatoms,
-                     t_blocka *             excl,    
-                     real *                 x,
-                     real *                 f,
-                     real *                 Vc,
-                     real *                 Vvdw,
-                     real *                 vpol,
-                     int *                  outeriter,
-                     int *                  inneriter,
-                     void *                 work);
+nb_kernel_allvsallgb(t_nblist *                nlist,
+                     rvec *                    x,
+                     rvec *                    f,
+                     t_forcerec *              fr,
+                     t_mdatoms *               mdatoms,
+                     nb_kernel_data_t *        kernel_data,
+                     t_nrnb *                  nrnb);
 
 #endif
index 500f0f1fe877bc308344fe116a729ceea3b4bce5..c33cadd829e9c5d5cfd2d4a4b0190a48227cdf53 100644 (file)
 /*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
  *                This source code is part of
  *
  *                 G   R   O   M   A   C   S
  *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
+ * Copyright (c) 2001-2012, The GROMACS Development Team
  *
  * Gromacs is a library for molecular simulation and trajectory analysis,
  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  * a full list of developers and information, check out http://www.gromacs.org
  *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
  * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
  *
  * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
+ * the papers people have written on it - you can find them on the website.
  */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-
-#include "types/nrnb.h"
-#include "nb_kernel_c.h"
-#include "../nb_kerneltype.h"
+#ifndef nb_kernel_c_h
+#define nb_kernel_c_h
 
+#include "../nb_kernel.h"
 
-/* Include standard kernel headers in local directory */
-#include "nb_kernel010.h"
-#include "nb_kernel020.h"
-#include "nb_kernel030.h"
-#include "nb_kernel100.h"
-#include "nb_kernel101.h"
-#include "nb_kernel102.h"
-#include "nb_kernel103.h"
-#include "nb_kernel104.h"
-#include "nb_kernel110.h"
-#include "nb_kernel111.h"
-#include "nb_kernel112.h"
-#include "nb_kernel113.h"
-#include "nb_kernel114.h"
-#include "nb_kernel120.h"
-#include "nb_kernel121.h"
-#include "nb_kernel122.h"
-#include "nb_kernel123.h"
-#include "nb_kernel124.h"
-#include "nb_kernel130.h"
-#include "nb_kernel131.h"
-#include "nb_kernel132.h"
-#include "nb_kernel133.h"
-#include "nb_kernel134.h"
-#include "nb_kernel200.h"
-#include "nb_kernel201.h"
-#include "nb_kernel202.h"
-#include "nb_kernel203.h"
-#include "nb_kernel204.h"
-#include "nb_kernel210.h"
-#include "nb_kernel211.h"
-#include "nb_kernel212.h"
-#include "nb_kernel213.h"
-#include "nb_kernel214.h"
-#include "nb_kernel220.h"
-#include "nb_kernel221.h"
-#include "nb_kernel222.h"
-#include "nb_kernel223.h"
-#include "nb_kernel224.h"
-#include "nb_kernel230.h"
-#include "nb_kernel231.h"
-#include "nb_kernel232.h"
-#include "nb_kernel233.h"
-#include "nb_kernel234.h"
-#include "nb_kernel300.h"
-#include "nb_kernel301.h"
-#include "nb_kernel302.h"
-#include "nb_kernel303.h"
-#include "nb_kernel304.h"
-#include "nb_kernel310.h"
-#include "nb_kernel311.h"
-#include "nb_kernel312.h"
-#include "nb_kernel313.h"
-#include "nb_kernel314.h"
-#include "nb_kernel320.h"
-#include "nb_kernel321.h"
-#include "nb_kernel322.h"
-#include "nb_kernel323.h"
-#include "nb_kernel324.h"
-#include "nb_kernel330.h"
-#include "nb_kernel331.h"
-#include "nb_kernel332.h"
-#include "nb_kernel333.h"
-#include "nb_kernel334.h"
-#include "nb_kernel400.h"
-#include "nb_kernel410.h"
-#include "nb_kernel420.h"
-#include "nb_kernel430.h"
+nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c;
 
 
-static nb_kernel_t *
-kernellist[eNR_NBKERNEL_NR] = 
+nb_kernel_info_t
+kernellist_c[] =
 {
-    nb_kernel010,
-    nb_kernel020,
-    nb_kernel030,
-    nb_kernel100,
-    nb_kernel101,
-    nb_kernel102,
-    nb_kernel103,
-    nb_kernel104,
-    nb_kernel110,
-    nb_kernel111,
-    nb_kernel112,
-    nb_kernel113,
-    nb_kernel114,
-    nb_kernel120,
-    nb_kernel121,
-    nb_kernel122,
-    nb_kernel123,
-    nb_kernel124,
-    nb_kernel130,
-    nb_kernel131,
-    nb_kernel132,
-    nb_kernel133,
-    nb_kernel134,
-    nb_kernel200,
-    nb_kernel201,
-    nb_kernel202,
-    nb_kernel203,
-    nb_kernel204,
-    nb_kernel210,
-    nb_kernel211,
-    nb_kernel212,
-    nb_kernel213,
-    nb_kernel214,
-    nb_kernel220,
-    nb_kernel221,
-    nb_kernel222,
-    nb_kernel223,
-    nb_kernel224,
-    nb_kernel230,
-    nb_kernel231,
-    nb_kernel232,
-    nb_kernel233,
-    nb_kernel234,
-    nb_kernel300,
-    nb_kernel301,
-    nb_kernel302,
-    nb_kernel303,
-    nb_kernel304,
-    nb_kernel310,
-    nb_kernel311,
-    nb_kernel312,
-    nb_kernel313,
-    nb_kernel314,
-    nb_kernel320,
-    nb_kernel321,
-    nb_kernel322,
-    nb_kernel323,
-    nb_kernel324,
-    nb_kernel330,
-    nb_kernel331,
-    nb_kernel332,
-    nb_kernel333,
-    nb_kernel334,
-    nb_kernel400,
-    nb_kernel410,
-    nb_kernel430,
-       nb_kernel010nf,
-    nb_kernel020nf,
-    nb_kernel030nf,
-    nb_kernel100nf,
-    nb_kernel101nf,
-    nb_kernel102nf,
-    nb_kernel103nf,
-    nb_kernel104nf,
-    nb_kernel110nf,
-    nb_kernel111nf,
-    nb_kernel112nf,
-    nb_kernel113nf,
-    nb_kernel114nf,
-    nb_kernel120nf,
-    nb_kernel121nf,
-    nb_kernel122nf,
-    nb_kernel123nf,
-    nb_kernel124nf,
-    nb_kernel130nf,
-    nb_kernel131nf,
-    nb_kernel132nf,
-    nb_kernel133nf,
-    nb_kernel134nf,
-    nb_kernel200nf,
-    nb_kernel201nf,
-    nb_kernel202nf,
-    nb_kernel203nf,
-    nb_kernel204nf,
-    nb_kernel210nf,
-    nb_kernel211nf,
-    nb_kernel212nf,
-    nb_kernel213nf,
-    nb_kernel214nf,
-    nb_kernel220nf,
-    nb_kernel221nf,
-    nb_kernel222nf,
-    nb_kernel223nf,
-    nb_kernel224nf,
-    nb_kernel230nf,
-    nb_kernel231nf,
-    nb_kernel232nf,
-    nb_kernel233nf,
-    nb_kernel234nf,
-    nb_kernel300nf,
-    nb_kernel301nf,
-    nb_kernel302nf,
-    nb_kernel303nf,
-    nb_kernel304nf,
-    nb_kernel310nf,
-    nb_kernel311nf,
-    nb_kernel312nf,
-    nb_kernel313nf,
-    nb_kernel314nf,
-    nb_kernel320nf,
-    nb_kernel321nf,
-    nb_kernel322nf,
-    nb_kernel323nf,
-    nb_kernel324nf,
-    nb_kernel330nf,
-    nb_kernel331nf,
-    nb_kernel332nf,
-    nb_kernel333nf,
-    nb_kernel334nf,
-    nb_kernel400nf,
-    nb_kernel410nf,
-    nb_kernel430nf,
+    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" }
 };
 
+int
+kernellist_c_size = sizeof(kernellist_c)/sizeof(kernellist_c[0]);
 
-void
-nb_kernel_setup(FILE *log,nb_kernel_t **list)
-{
-  int i;
-  nb_kernel_t *p;
-
-  if(NULL != log)
-    fprintf(log,"Configuring standard C nonbonded kernels...\n");
-
-  for(i=0;i<eNR_NBKERNEL_NR;i++)
-  {
-    p = kernellist[i];
-    if(p!=NULL)
-      list[i] = p; 
-  }
-}    
+#endif
index c24b95263bb19074540ed5d251b5cce075315c50..8aacf2bb70f9db39f90b34a959d4ed44206bdc54 100644 (file)
@@ -1,49 +1,36 @@
 /*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
  *                This source code is part of
  *
  *                 G   R   O   M   A   C   S
  *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
+ * Copyright (c) 2001-2012, The GROMACS Development Team
  *
  * Gromacs is a library for molecular simulation and trajectory analysis,
  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  * a full list of developers and information, check out http://www.gromacs.org
  *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
  * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
  *
  * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL_H_
-#define _NBKERNEL_H_
-
-/** \file
- * \brief The vanilla nonbonded Gromacs kernels (portable, written in C).
- *
- * \internal
+ * the papers people have written on it - you can find them on the website.
  */
+#ifndef nb_kernel_c_h
+#define nb_kernel_c_h
 
-#include <stdio.h>
+#include "../nb_kernel.h"
 
-#include "../nb_kerneltype.h"
-#include "nb_kernel_allvsall.h"
-#include "nb_kernel_allvsallgb.h"
 
-void
-nb_kernel_setup(FILE *fplog,nb_kernel_t **list);
+/* List of kernels for this architecture with metadata about them */
+extern nb_kernel_info_t
+kernellist_c[];
 
-#endif /* _NBKERNEL_H_ */
+/* Length of kernellist_c */
+extern int
+kernellist_c_size;
 
+#endif
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
new file mode 100644 (file)
index 0000000..275d0e6
--- /dev/null
@@ -0,0 +1,844 @@
+/* #if 0 */
+#error This file must be processed with the Gromacs pre-preprocessor
+/* #endif */
+/* #if INCLUDE_HEADER */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+/* #endif */
+
+/* ## List of variables set by the generating script:                                    */
+/* ##                                                                                    */
+/* ## Setttings that apply to the entire kernel:                                         */
+/* ## KERNEL_ELEC:           String, choice for electrostatic interactions               */
+/* ## KERNEL_VDW:            String, choice for van der Waals interactions               */
+/* ## KERNEL_NAME:           String, name of this kernel                                 */
+/* ## KERNEL_VF:             String telling if we calculate potential, force, or both    */
+/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */
+/* ##                                                                                    */
+/* ## Setttings that apply to particles in the outer (I) or inner (J) loops:             */
+/* ## PARTICLES_I[]/         Arrays with lists of i/j particles to use in kernel. It is  */
+/* ## PARTICLES_J[]:         just [0] for particle geometry, but can be longer for water */
+/* ## PARTICLES_ELEC_I[]/    Arrays with lists of i/j particle that have electrostatics  */
+/* ## PARTICLES_ELEC_J[]:    interactions that should be calculated in this kernel.      */
+/* ## PARTICLES_VDW_I[]/     Arrays with the list of i/j particle that have VdW          */
+/* ## PARTICLES_VDW_J[]:     interactions that should be calculated in this kernel.      */
+/* ##                                                                                    */
+/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle)   */
+/* ## PAIRS_IJ[]:            Array with (i,j) tuples of pairs for which interactions     */
+/* ##                        should be calculated in this kernel. Zero-charge particles  */
+/* ##                        do not have interactions with particles without vdw, and    */
+/* ##                        Vdw-only interactions are not evaluated in a no-vdw-kernel. */
+/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */
+/* ##                        For each i-j pair, the element [I][J] is a list of strings  */
+/* ##                        defining properties/flags of this interaction. Examples     */
+/* ##                        include 'electrostatics'/'vdw' if that type of interaction  */
+/* ##                        should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values  */
+/* ##                        are needed, and 'exactcutoff' or 'shift','switch' to        */
+/* ##                        decide if the force/potential should be modified. This way  */
+/* ##                        we only calculate values absolutely needed for each case.   */
+
+/* ## Calculate the size and offset for (merged/interleaved) table data */
+
+/* #if ('CubicSplineTable' in [KERNEL_ELEC,KERNEL_VDW]) or KERNEL_VF=='PotentialAndForce' */
+/*     #define TABLE_POINT_SIZE 4 */
+/* #else */
+/*     #define TABLE_POINT_SIZE 2 */
+/* #endif */
+
+/* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
+/*     #define TABLE_NINTERACTIONS 3 */
+/*     #define TABLE_VDW_OFFSET TABLE_POINT_SIZE */
+/* #elif 'Table' in KERNEL_ELEC */
+/*     #define TABLE_NINTERACTIONS 1 */
+/* #elif 'Table' in KERNEL_VDW */
+/*     #define TABLE_NINTERACTIONS 2 */
+/*     #define TABLE_VDW_OFFSET 0 */
+/* #else */
+/*     #define TABLE_NINTERACTIONS 0 */
+/* #endif */
+
+/* #if 'Buckingham' in KERNEL_VDW */
+/*   #define NVDWPARAM 3 */
+/* #else */
+/*   #define NVDWPARAM 2 */
+/* #endif */
+
+/*
+ * Gromacs nonbonded kernel:   {KERNEL_NAME}
+ * Electrostatics interaction: {KERNEL_ELEC}
+ * VdW interaction:            {KERNEL_VDW}
+ * Geometry:                   {GEOMETRY_I}-{GEOMETRY_J}
+ * Calculate force/pot:        {KERNEL_VF}
+ */
+void
+{KERNEL_NAME}
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */
+    /* ## so there is no point in going to extremes to exclude variables that are not needed. */
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    /* #for I in PARTICLES_I */
+    int              vdwioffset{I};
+    real             ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I};
+    /* #endfor */
+    /* #for J in PARTICLES_J */
+    int              vdwjidx{J};
+    real             jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J};
+    /* #endfor */
+    /* #for I,J in PAIRS_IJ */
+    real             dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J},cexp1_{I}{J},cexp2_{I}{J};
+    /* #endfor */
+    /* #if KERNEL_ELEC != 'None' */
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    /* #endif */
+    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    /* #endif */
+    /* #if KERNEL_VDW != 'None' */
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    /* #endif */
+    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    /* #endif */
+    /* #if 'Ewald' in KERNEL_ELEC */
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    /* #endif */
+    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+    /* #endif */
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    /* #if KERNEL_ELEC != 'None' */
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    /*     #if 'ReactionField' in KERNEL_ELEC */
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    /*     #endif */
+    /* #endif */
+    /* #if KERNEL_VDW != 'None' */
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+    /* #endif */
+
+    /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+    /* #elif 'Table' in KERNEL_ELEC */
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+    /* #elif 'Table' in KERNEL_VDW */
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+    /* #endif */
+
+    /* #if 'Ewald' in KERNEL_ELEC */
+    sh_ewald         = fr->ic->sh_ewald;
+    /*     #if KERNEL_VF=='Force'  */
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+    /*     #else */
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+     /*     #endif */
+    /* #endif */
+
+    /* #if KERNEL_ELEC=='GeneralizedBorn' */
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+    /* #endif */
+
+    /* #if 'Water' in GEOMETRY_I */
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    /*     #for I in PARTICLES_ELEC_I */
+    iq{I}              = facel*charge[inr+{I}];
+    /*     #endfor */
+    /*     #for I in PARTICLES_VDW_I */
+    vdwioffset{I}      = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}];
+    /*     #endfor */
+    /* #endif */
+
+    /* #if 'Water' in GEOMETRY_J */
+    /*     #for J in PARTICLES_ELEC_J */
+    jq{J}              = charge[inr+{J}];
+    /*     #endfor */
+    /*     #for J in PARTICLES_VDW_J */
+    vdwjidx{J}         = {NVDWPARAM}*vdwtype[inr+{J}];
+    /*     #endfor */
+    /*     #for I,J in PAIRS_IJ */
+    /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+    qq{I}{J}             = iq{I}*jq{J};
+    /*         #endif */
+    /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+    /*             #if 'Buckingham' in KERNEL_VDW */
+    c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+    cexp1_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+    cexp2_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+2];
+    /*             #else */
+    c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+    c12_{I}{J}           = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+    /*             #endif */
+    /*         #endif */
+    /*     #endfor */
+    /* #endif */
+
+    /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */
+    /*     #if KERNEL_ELEC!='None' */
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    /*     #else */
+    rcutoff          = fr->rvdw;
+    /*     #endif */
+    rcutoff2         = rcutoff*rcutoff;
+    /* #endif */
+
+    /* #if KERNEL_MOD_VDW=='PotentialShift' */
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+    /* #endif */
+
+    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
+    /*     #if KERNEL_MOD_ELEC=='PotentialSwitch'  */
+    rswitch          = fr->rcoulomb_switch;
+    /*     #else */
+    rswitch          = fr->rvdw_switch;
+    /*     #endif */
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    /*     #if 'Force' in KERNEL_VF */
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+    /*     #endif */
+    /* #endif */
+
+    /* ## Keep track of the floating point operations we issue for reporting! */
+    /* #define OUTERFLOPS 0 */
+    /* #define INNERFLOPS 0 */
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        /* ## Loop over i particles, but only include ones that we use - skip e.g. vdw-only sites for elec-only kernel */
+        /*     #for I in PARTICLES_I */
+        ix{I}              = shX + x[i_coord_offset+DIM*{I}+XX];
+        iy{I}              = shY + x[i_coord_offset+DIM*{I}+YY];
+        iz{I}              = shZ + x[i_coord_offset+DIM*{I}+ZZ];
+        /*     #define OUTERFLOPS OUTERFLOPS+3 */
+        /* #endfor */
+
+        /* #if 'Force' in KERNEL_VF */
+        /*     #for I in PARTICLES_I */
+        fix{I}             = 0.0;
+        fiy{I}             = 0.0;
+        fiz{I}             = 0.0;
+        /*     #endfor */
+        /* #endif */
+
+        /* ## For water we already preloaded parameters at the start of the kernel */
+        /* #if not 'Water' in GEOMETRY_I */
+        /* Load parameters for i particles */
+        /*     #for I in PARTICLES_ELEC_I */
+        iq{I}              = facel*charge[inr+{I}];
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
+        isai{I}            = invsqrta[inr+{I}];
+        /*         #endif */
+        /*     #endfor */
+        /*     #for I in PARTICLES_VDW_I */
+        vdwioffset{I}      = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}];
+        /*     #endfor */
+        /* #endif */
+
+        /* #if 'Potential' in KERNEL_VF */
+        /* Reset potential sums */
+        /*     #if KERNEL_ELEC != 'None' */
+        velecsum         = 0.0;
+        /*     #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
+        vgbsum           = 0.0;
+        /*     #endif */
+        /*     #if KERNEL_VDW != 'None' */
+        vvdwsum          = 0.0;
+        /*     #endif */
+        /* #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
+        dvdasum          = 0.0;
+        /*     #endif */
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            /* #for J in PARTICLES_J */
+            jx{J}              = x[j_coord_offset+DIM*{J}+XX];
+            jy{J}              = x[j_coord_offset+DIM*{J}+YY];
+            jz{J}              = x[j_coord_offset+DIM*{J}+ZZ];
+            /* #endfor */
+
+            /* Calculate displacement vector */
+            /* #for I,J in PAIRS_IJ */
+            dx{I}{J}             = ix{I} - jx{J};
+            dy{I}{J}             = iy{I} - jy{J};
+            dz{I}{J}             = iz{I} - jz{J};
+            /*     #define INNERFLOPS INNERFLOPS+3 */
+            /* #endfor */
+
+            /* Calculate squared distance and things based on it */
+            /* #for I,J in PAIRS_IJ */
+            rsq{I}{J}            = dx{I}{J}*dx{I}{J}+dy{I}{J}*dy{I}{J}+dz{I}{J}*dz{I}{J};
+            /*     #define INNERFLOPS INNERFLOPS+5 */
+            /* #endfor */
+
+            /* #for I,J in PAIRS_IJ */
+            /*     #if 'rinv' in INTERACTION_FLAGS[I][J] */
+            rinv{I}{J}           = gmx_invsqrt(rsq{I}{J});
+            /*         #define INNERFLOPS INNERFLOPS+5 */
+            /*     #endif */
+            /* #endfor */
+
+            /* #for I,J in PAIRS_IJ */
+            /*     #if 'rinvsq' in INTERACTION_FLAGS[I][J] */
+            /*         # if 'rinv' not in INTERACTION_FLAGS[I][J] */
+            rinvsq{I}{J}         = 1.0/rsq{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+4 */
+            /*         #else */
+            rinvsq{I}{J}         = rinv{I}{J}*rinv{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*     #endif */
+            /* #endfor */
+
+            /* #if not 'Water' in GEOMETRY_J */
+            /* Load parameters for j particles */
+            /*     #for J in PARTICLES_ELEC_J */
+            jq{J}              = charge[jnr+{J}];
+            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
+            isaj{J}           = invsqrta[jnr+{J}];
+            /*         #endif */
+            /*     #endfor */
+            /*     #for J in PARTICLES_VDW_J */
+            vdwjidx{J}         = {NVDWPARAM}*vdwtype[jnr+{J}];
+            /*     #endfor */
+            /* #endif */
+
+            /* #for I,J in PAIRS_IJ */
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
+            /*         ## We always calculate rinv/rinvsq above to enable pipelineing in compilers (performance tested on x86) */
+            if (rsq{I}{J}<rcutoff2)
+            {
+                /*     #if 0    ## this and the next two lines is a hack to maintain auto-indentation in template file */
+            }
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'r' in INTERACTION_FLAGS[I][J] */
+            r{I}{J}              = rsq{I}{J}*rinv{I}{J};
+            /*         #define INNERFLOPS INNERFLOPS+1 */
+            /*     #endif */
+
+            /*     ## For water geometries we already loaded parameters at the start of the kernel */
+            /*     #if not 'Water' in GEOMETRY_J */
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            qq{I}{J}             = iq{I}*jq{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+            /*             #if KERNEL_VDW=='Buckingham' */
+            c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+            cexp1_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+            cexp2_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+2];
+            /*             #else */
+            c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+            c12_{I}{J}           = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+            /*             #endif */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'table' in INTERACTION_FLAGS[I][J] */
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r{I}{J}*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = {TABLE_NINTERACTIONS}*{TABLE_POINT_SIZE}*vfitab;
+            /*         #define INNERFLOPS INNERFLOPS+2 */
+            /*     #endif */
+
+            /*     ## ELECTROSTATIC INTERACTIONS */
+            /*     #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+
+            /*         #if KERNEL_ELEC=='Coulomb' */
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq{I}{J}*rinv{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = velec*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+2 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='ReactionField' */
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            /*             #if 'Potential' in KERNEL_VF */
+            velec            = qq{I}{J}*(rinv{I}{J}+krf*rsq{I}{J}-crf);
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = qq{I}{J}*(rinv{I}{J}*rinvsq{I}{J}-krf2);
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai{I}*isaj{J};
+            gbqqfactor       = isaprod*(-qq{I}{J})*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+{J}];
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r{I}{J}*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+            /*             #define INNERFLOPS INNERFLOPS+10 */
+
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r{I}{J});
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj{J}*isaj{J};
+            /*                 #define INNERFLOPS INNERFLOPS+13 */
+            /*             #endif */
+            velec            = qq{I}{J}*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = (velec*rinv{I}{J}-fgb)*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='Ewald' */
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r{I}{J}*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            /*             #define INNERFLOPS INNERFLOPS+2 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_ELEC=='PotentialSwitch' */
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*                 #if KERNEL_MOD_ELEC=='PotentialShift' */
+            velec            = qq{I}{J}*((rinv{I}{J}-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            /*                     #define INNERFLOPS INNERFLOPS+7 */
+            /*                 #else */
+            velec            = qq{I}{J}*(rinv{I}{J}-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            /*                     #define INNERFLOPS INNERFLOPS+6 */
+            /*                 #endif */
+            /*                 #if 'Force' in KERNEL_VF */
+            felec            = qq{I}{J}*rinv{I}{J}*(rinvsq{I}{J}-felec);
+            /*                      #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #endif */
+            /*             #elif KERNEL_VF=='Force' */
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq{I}{J}*rinv{I}{J}*(rinvsq{I}{J}-felec);
+            /*                 #define INNERFLOPS INNERFLOPS+7 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='CubicSplineTable' */
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab];
+            /*             #endif */
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            velec            = qq{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq{I}{J}*FF*vftabscale*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+7 */
+            /*             #endif */
+            /*         #endif */
+            /*         ## End of check for electrostatics interaction forms */
+            /*     #endif */
+            /*     ## END OF ELECTROSTATIC INTERACTION CHECK FOR PAIR I-J */
+
+            /*     #if 'vdw' in INTERACTION_FLAGS[I][J] */
+
+            /*         #if KERNEL_VDW=='LennardJones' */
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq{I}{J}*rinvsq{I}{J}*rinvsq{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+2 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
+            vvdw6            = c6_{I}{J}*rinvsix;
+            vvdw12           = c12_{I}{J}*rinvsix*rinvsix;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
+            vvdw             = (vvdw12 - c12_{I}{J}*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_{I}{J}*sh_vdw_invrcut6)*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+8 */
+            /*                 #else */
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #endif */
+            /*                 ## Check for force inside potential check, i.e. this means we already did the potential part */
+            /*                 #if 'Force' in KERNEL_VF */
+            fvdw             = (vvdw12-vvdw6)*rinvsq{I}{J};
+            /*                     #define INNERFLOPS INNERFLOPS+2 */
+            /*                 #endif */
+            /*             #elif KERNEL_VF=='Force' */
+            /*                 ## Force-only LennardJones makes it possible to save 1 flop (they do add up...) */
+            fvdw             = (c12_{I}{J}*rinvsix-c6_{I}{J})*rinvsix*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+
+            /*         #elif KERNEL_VDW=='Buckingham' */
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq{I}{J}*rinvsq{I}{J}*rinvsq{I}{J};
+            vvdw6            = c6_{I}{J}*rinvsix;
+            br               = cexp2_{I}{J}*r{I}{J};
+            vvdwexp          = cexp1_{I}{J}*exp(-br);
+            /*             ## Estimate exp() to 25 flops */
+            /*             #define INNERFLOPS INNERFLOPS+31 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch'  */
+            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
+            vvdw             = (vvdwexp-cexp1_{I}{J}*exp(-cexp2_{I}{J}*rvdw)) - (vvdw6 - c6_{I}{J}*sh_vdw_invrcut6)*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+33 */
+            /*                  #else */
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+2 */
+            /*                 #endif */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_VDW=='CubicSplineTable' */
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += {TABLE_VDW_OFFSET};
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab];
+            /*             #endif */
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_{I}{J}*FF;
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab+4];
+            /*             #endif */
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_{I}{J}*FF;
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*             #if 'Potential' in KERNEL_VF */
+            vvdw             = vvdw12+vvdw6;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*         #endif */
+            /*         ## End of check for vdw interaction forms */
+            /*     #endif */
+            /*     ## END OF VDW INTERACTION CHECK FOR PAIR I-J */
+
+            /*     #if 'switch' in INTERACTION_FLAGS[I][J] */
+            d                = r{I}{J}-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+            /*         #define INNERFLOPS INNERFLOPS+9 */
+
+            /*         #if 'Force' in KERNEL_VF */
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*         #endif */
+
+            /* Evaluate switch function */
+            /*         #if 'Potential' in KERNEL_VF */
+            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
+            velec           *= sw;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
+            vvdw            *= sw;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*         #endif */
+            /*         #if 'Force' in KERNEL_VF */
+            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
+            felec            = felec*sw + velec*dsw;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
+            fvdw             = fvdw*sw + vvdw*dsw;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'Potential' in KERNEL_VF */
+            /* Update potential sums from outer loop */
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            velecsum        += velec;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
+            vgbsum          += vgb;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*         #endif */
+            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+            vvdwsum         += vvdw;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'Force' in KERNEL_VF */
+
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] and 'vdw' in INTERACTION_FLAGS[I][J] */
+            fscal            = felec+fvdw;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #elif 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            fscal            = felec;
+            /*         #elif 'vdw' in INTERACTION_FLAGS[I][J] */
+            fscal            = fvdw;
+            /*        #endif */
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx{I}{J};
+            ty               = fscal*dy{I}{J};
+            tz               = fscal*dz{I}{J};
+
+            /* Update vectorial force */
+            fix{I}            += tx;
+            fiy{I}            += ty;
+            fiz{I}            += tz;
+            f[j_coord_offset+DIM*{J}+XX] -= tx;
+            f[j_coord_offset+DIM*{J}+YY] -= ty;
+            f[j_coord_offset+DIM*{J}+ZZ] -= tz;
+
+            /*         #define INNERFLOPS INNERFLOPS+9 */
+            /*     #endif */
+
+            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
+            /*         #if 0    ## This and next two lines is a hack to maintain indentation in template file */
+            {
+                /*     #endif */
+            }
+            /*     #endif */
+            /*    ## End of check for the interaction being outside the cutoff */
+
+            /* #endfor */
+            /* ## End of loop over i-j interaction pairs */
+
+            /* Inner loop uses {INNERFLOPS} flops */
+        }
+        /* End of innermost loop */
+
+        /* #if 'Force' in KERNEL_VF */
+        tx = ty = tz = 0;
+        /*     #for I in PARTICLES_I */
+        f[i_coord_offset+DIM*{I}+XX] += fix{I};
+        f[i_coord_offset+DIM*{I}+YY] += fiy{I};
+        f[i_coord_offset+DIM*{I}+ZZ] += fiz{I};
+        tx                         += fix{I};
+        ty                         += fiy{I};
+        tz                         += fiz{I};
+        /*         #define OUTERFLOPS OUTERFLOPS+6 */
+        /*     #endfor */
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+        /*     #define OUTERFLOPS OUTERFLOPS+3 */
+        /* #endif */
+
+        /* #if 'Potential' in KERNEL_VF */
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        /*     #if KERNEL_ELEC != 'None' */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /*     #if KERNEL_VDW != 'None' */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /* #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
+        dvda[nri]                   = dvda[nri] + dvdasum*isai{I}*isai{I};
+        /*     #endif */
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses {OUTERFLOPS} flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+    /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */
+    /* ## primitive and replaces aggressively even in strings inside these directives, we need to      */
+    /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source.      */
+    /* #if GEOMETRY_I == 'Water3'            */
+    /*     #define ISUFFIX '_W3'             */
+    /* #elif GEOMETRY_I == 'Water4'          */
+    /*     #define ISUFFIX '_W4'             */
+    /* #else                                 */
+    /*     #define ISUFFIX ''                */
+    /* #endif                                */
+    /* #if GEOMETRY_J == 'Water3'            */
+    /*     #define JSUFFIX 'W3'              */
+    /* #elif GEOMETRY_J == 'Water4'          */
+    /*     #define JSUFFIX 'W4'              */
+    /* #else                                 */
+    /*     #define JSUFFIX ''                */
+    /* #endif                                */
+    /* #if 'PotentialAndForce' in KERNEL_VF  */
+    /*     #define VFSUFFIX  '_VF'           */
+    /* #elif 'Potential' in KERNEL_VF        */
+    /*     #define VFSUFFIX '_V'             */
+    /* #else                                 */
+    /*     #define VFSUFFIX '_F'             */
+    /* #endif                                */
+
+    /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #elif KERNEL_ELEC != 'None' */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #else */
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #endif  */
+}
index 686b57dfd387a81c8b696082f59b3880e4fa6961..f36f988f8bb552cffcb8bd00bbb51c631df804e0 100644 (file)
@@ -35,7 +35,7 @@
 
 #include "types/nrnb.h"
 #include "nb_kernel_f77_double.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 /* Include standard kernel headers in local directory */
index 1ab0761f808fe77b3c90335a978c03b51edc93c7..18a5f3ec268df13685da5ea44d92b75c91840874 100644 (file)
@@ -39,7 +39,7 @@
 
 #include <stdio.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 void
 nb_kernel_setup_f77_double(FILE *fplog,nb_kernel_t **list);
index 35edd9b9ed6627707bc69a65ab96e389ae8ba78b..493873195d1c434fab767df8cd80285809fdeda7 100644 (file)
@@ -35,7 +35,7 @@
 
 #include "types/nrnb.h"
 #include "nb_kernel_f77_single.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 /* Include standard kernel headers in local directory */
index 0498c519272901cd4190ac3567683c133844c03b..0b3ed0604b850a80f9696fd8e0ae98c5e4b25504 100644 (file)
@@ -39,7 +39,7 @@
 
 #include <stdio.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 void
 nb_kernel_setup_f77_single(FILE *fplog,nb_kernel_t **list);
index 8d77ad11af0e87cc489a0a416837b50ffe90dec1..5682952996ab99ca8f31df2923f9aa0b30bcf7a6 100644 (file)
@@ -35,7 +35,7 @@
 
 #include "types/nrnb.h"
 #include "nb_kernel_power6.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 /* Include standard kernel headers in local directory */
index 213280d07867bbbb2d0dc89f91edc951338a5a8f..e2d6c5c2abe5c982c71bbab64b5a10d3eaa358cc 100644 (file)
@@ -39,7 +39,7 @@
 
 #include <stdio.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 void
 nb_kernel_setup_power6(FILE *fplog,nb_kernel_t **list);
index bd94698769803db94128cdf3c73694f26d0b1c76..7954df51ae5400522892bfc9aaaed3cd3f93d1c8 100644 (file)
@@ -23,7 +23,7 @@
 #include <gmx_sse2_double.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 void nb_kernel400_sse2_double(int *           p_nri,
index 11ec2e97e716552bd9b4ca990f202e2240df69c3..780c941b546a59a25b2611a2a71a8e77b0af32da 100644 (file)
@@ -23,7 +23,7 @@
 #include <gmx_sse2_double.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 
index 3c11023c37d97bf248062bcdb0d634526d342908..85c0832aefcdf478f7edf62d48ef43e5dd90b119 100644 (file)
@@ -23,7 +23,7 @@
 #include <gmx_sse2_double.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 void nb_kernel430_sse2_double(int *           p_nri,
index 7b0b6a0af0f4c58d84f3d4139af5de9f893d1cf0..4626eba996a69576407d90aaa30f8bc0cbd0ef23 100644 (file)
@@ -36,7 +36,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 #include "nb_kernel_sse2_double.h"
 
 static nb_kernel_t *
index 1c6735c53a7f76d95dcfaecfe03f0147a114231a..c6e1a6615d6b3ca481b45db84f64fd064e680619 100644 (file)
@@ -27,7 +27,7 @@
 
 #include <types/simple.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 #ifdef __cplusplus
 extern "C" {
index 33989e3222fef962b9fead59b18b6df191bf8ef3..e11930e1d282a718d9bb47b5b413eb924f9128a6 100644 (file)
@@ -23,7 +23,7 @@
 #include <gmx_sse2_single.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 void nb_kernel400_sse2_single(int *           p_nri,
index 4b567fa0f36a8a1e6b082f0735ccf2492054533c..18b99ec7e7b631d0a58d4ed9cab0715dc721d374 100644 (file)
@@ -23,7 +23,7 @@
 #include <gmx_sse2_single.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 void nb_kernel410_sse2_single(int *           p_nri,
index 7e3102350cbe58b503a3341cd4de37c3ce95c4da..c06c1452ef6d21dcb4caf40949aade89277b07ac 100644 (file)
@@ -24,7 +24,7 @@
 #include <gmx_sse2_single.h>
 
 /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 
 
 void nb_kernel430_sse2_single(int *           p_nri,
index 4daf48c015145ce79b7d54e4e488e0f71ff0fb71..6a2d881c15f92a4363accaaed778cf7fd89273ea 100644 (file)
@@ -41,7 +41,7 @@
 #include <intrin.h>
 #endif
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 #include "nb_kernel_sse2_single.h"
 
 static nb_kernel_t *
index fa70a01244565353f4af27b80ee9c3f7607e1c97..29412ffb4d76f7db85bb92fffe7e8819e81e4465 100644 (file)
@@ -27,7 +27,7 @@
 
 #include <types/simple.h>
 
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
 #include "nb_kernel_allvsall_sse2_single.h"
 #include "nb_kernel_allvsallgb_sse2_single.h"
 
diff --git a/src/gmxlib/nonbonded/nb_kerneltype.h b/src/gmxlib/nonbonded/nb_kerneltype.h
deleted file mode 100644 (file)
index 6c8e443..0000000
+++ /dev/null
@@ -1,381 +0,0 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- 
- *
- * Gromacs 4.0                         Copyright (c) 1991-2003
- * David van der Spoel, Erik Lindahl, University of Groningen.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org
- * 
- * And Hey:
- * Gnomes, ROck Monsters And Chili Sauce
- */
-
-#ifndef _NB_KERNEL_H_
-#define _NB_KERNEL_H_
-
-#include <types/simple.h>
-
-/** \file
- *  \brief Function interfaces and nonbonded kernel meta-data.
- *
- *  \internal
- *
- *  This file contains the definitions of the call sequences and 
- *  parameter documetation for the nonbonded kernels, both the 
- *  optimized level1/level2 kernels in Fortran or C, and the more 
- *  general level0 normal and free energy kernels. 
- *  It also defines an information structure used to record
- *  data about nonbonded kernels, their type of call sequence, 
- *  and the number of flops used in the outer and inner loops.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-} /* fixes auto-indentation problems */
-#endif
-
-
-
-
-/* Temporary structure to be able to pass 2 GB data through the
- * work data pointer argument.
- */
-typedef struct 
-{
-       real      gb_epsilon_solvent;  /* Epsilon for solvent */
-       real      epsilon_r;           /* Epsilon for inner dielectric */
-       real *    gpol;                /* Polarization energy group */
-} gmx_gbdata_t;
-       
-       
-/** Interface to level1 optimized nonbonded kernels.
- * 
- *  \internal
- *
- *  To simplify the jungle of nonbonded function calls in earlier
- *  versions of Gromacs, we have changed most nonbonded kernels
- *  to use the same call sequence, even if some of the parameters
- *  are ununsed in a particular kernel.
- *
- *  This function call sequence is used by all level1 kernels. When this is
- *  written it is actually used by the level2 kernels too, but that might
- *  change in the future - don't assume they are identical.
- *  All arguments are passed as pointers to make to interface identical across 
- *  the Fortran and C implementations. For the same reason (Fortran & assembly) 
- *  we cannot use structures.
- *
- *  The nonbonded kernels should never be called directly - use
- *  the setup and wrapper routines in gmx_nonbonded.h instead.
- *
- *  \param nri        Number of neighborlists/i-particles/outer-particles.
- *                    The outer loop of the nonbonded kernel will iterate
- *                    over these indices. This number must always
- *                    be provided,
- *  \param iinr       Atom number (starting on 0) for each index 0..(nri-1)
- *                    in the outer loop. These are the 'owner'/'parent' 
- *                    particles of each neighborlist. It is quite common
- *                    that a particle occurs several times with different
- *                    shift indices. This is the field 'ilist' in 
- *                    the neighborlist. This array must always be provided.
- *  \param jindex     This array specifies the index  of the first j neighbor
- *                    in the array jjnr[] which belongs to the corresponding
- *                    i atom. The j neighbors for all lists are merged into
- *                    one continous array, so we also get the first index for
- *                    the next list in the next element. The size of this list
- *                    is nri+1, and the last position contains the number of
- *                    entries in the j list. Confused? It is actually pretty
- *                    straightforward; for index <tt>i</tt> the i atom is 
- *                    <tt>iinr[i]</tt>, and the atom indices of the neighbors 
- *                    to interact with are in positions <tt>jindex[i]</tt> 
- *                    through <tt>(jindex[i+1]-1)</tt> in <tt>jjnr[]</tt>.
- *                    This corresponds to ilist_jindex in the neighborlist.
- *                    This array must always be provided.
- *  \param jjnr       Array with the j particle neighbors of list i in positions
- *                    <tt>jindex[i]</tt> through <tt>(jindex[i+1]-1)</tt>. This
- *                    field is called jlist in the neighborlist structure.
- *                    This array must always be provided.
- *  \param shift      Array with shift vector index corresponding to each 
- *                    iinr neighborlist. In most codes, periodic boundary 
- *                    conditions are applied by checking pairwise distances 
- *                    of particles, and correcting differences of more than 
- *                    half a box. This is costly (latencies) and unnecessary 
- *                    since we already decided which periodic copy to use during
- *                    neighborsearching. We solve this by introducing 
- *                    \a shiftvectors and a \a shiftindex. The shiftvectors is 
- *                    normally an array of 3*3*3 vectors, corresponding to 
- *                    displacements +- one box unit in one or more directions.
- *                    The central box (no displacement) is always in the middle,
- *                    index 13 in the case of 3*3*3 shift vectors.
- *                    Imagine a particle interacting with two particles in the
- *                    current box, and three particles in the box to the left. 
- *                    We represent this with two ilist entries, one with shift 
- *                    index=13 (central box) and one with shift index=12. 
- *                    all neighbors in each sublist will have the same shift,  
- *                    so in the nonbonded interaction, we only have to add the 
- *                    shift vector to the outer (i) particle coordinates before
- *                    starting the loop over neighbors. This extracts periodic 
- *                    boundary condition calculations from the inner loops, 
- *                    increasing performance significantly. This array must
- *                    always be provided, but if you do not use any periodic
- *                    boundary conditions you could set all elements to zero
- *                    and provide a shiftvec array of length three, with all
- *                    elements there too being zero.
- *  \param shiftvec   The shift vectors for each index. Since the code needs to
- *                    interface with Fortran or assembly, we have to use 
- *                    pointers to float/double instead of the gmx_rvec_t type.
- *                    The x/y/z shift distances for shift index idx are in
- *                    <tt>shiftvec[3*idx]</tt>, <tt>shiftvec[3*idx+1]</tt>, 
- *                    and <tt>shiftvec[3*idx+2]</tt>. This is fully binary 
- *                    compatible with an array of gmx_rvec_t, so you can simply
- *                    use a typecast when calling. The length of the array is
- *                    three times the number of shift indices. This array
- *                    must be provided, but see the comment for shift.
- *  \param fshift     Forces from 'other periodic boxes' for each shift index.
- *                    The shift concept does not affect the forces on individual
- *                    atoms, but it will have an effect on the virial. To 
- *                    account for this, we add the total force on the i particle 
- *                    in each outer loop to the current shift index in this 
- *                    array, which has exactly the same dimensions as the 
- *                    shiftvec array above. The Gromacs manual describes how 
- *                    this is used to calculate the effect in the virial.
- *                    Dimension is identical to shiftvec.
- *                    For kernels that do not calculate forces this can be NULL.
- *  \param gid        Energy group index for each i neighborlist index; this
- *                    corresponds to ilist_groupindex in the neighborlist and is
- *                    used to decompose energies into groupwise contributions.
- *                    If an i particle belongs to group A, and has interactions 
- *                    both with groups A,B, and C, you will get three different 
- *                    i list entries with energy group indices corresponding to 
- *                    A-A, A-B, and A-C neighbors, in addition to possible
- *                    copies due to different shift indices. The energies will 
- *                    be added to this index (i.e. index=gid[i]) in the vc and 
- *                    vnb arrays. This array must always be provided.
- *                    If you don't want to decompose energies all groups should 
- *                    be set to index 0 (length or the array nri).
- *  \param pos        Coordinates of atoms, starting on 0 as all arrays. To
- *                    interface with fortran and assembly this is a simple list
- *                    of floats, with x/y/z of atom n in positions
- *                    <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
- *                    <tt>pos[3*n+2]</tt>. This is binary compatible with,
- *                    and can be typecast from, an array of gmx_rvec_t.
- *                    This array must always be provided.
- *  \param faction    Forces of atoms. To interface with fortran and assembly 
- *                    this is a simple list of floats, with x/y/z of atom n in 
- *                    positions <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
- *                    <tt>pos[3*n+2]</tt>. This is binary compatible with,
- *                    and can be typecast from, an array of gmx_rvec_t.
- *                    For kernels that do not calculate forces this can be NULL.
- *  \param charge     Array with the charge of atom n in position n.
- *                    If you are calling a kernel without coulomb interaction
- *                    this parameter can safely be set to NULL.
- *  \param facel      Factor to multiply all electrostatic interactions with;
- *                    this is essentially 1/(4*pi*epsilon). For kernels without
- *                    coulomb interaction the value will not be used. In the
- *                    fortran version (pass-by-reference) it can be NULL in
- *                    that case.
- *  \param krf        The 'k' constant for reaction-field electrostatic
- *                    interactions. Can be zero/NULL for non-RF interactions.
- *  \param crf        The 'c' constant for reaction-field electrostatic
- *                    interactions. Can be zero/NULL for non-RF interactions.
- *  \param vc         List of Coulomb energies, the length is equal to the
- *                    number of energy group combinations. As described for
- *                    gid above, the coulomb energy of each outer/i-particle
- *                    list will be added to vc[gid[i]]. Can be NULL for
- *                    kernels without electrostatic interaction.
- *  \param type       Array with the index of the Van der Waals type for each
- *                    atom, starting on 0 . This is used to look up the VdW 
- *                    parameters. When no VdW interactions are calculated 
- *                    (i.e. only Coulomb), this can be set to NULL. 
- *  \param ntype      Number of VdW types, used to look up the VdW parameters. 
- *                    This parameter can be zero (NULL for pass-by-reference)
- *                    if the kernel does not include VdW interactions.
- *  \param vdwparam   Array with Van der Waals parameters. The contents and 
- *                    size depends on the number of parameters required for
- *                    the selected VdW interaction (3 for B-ham, 2 for LJ, etc).
- *                    There are a total of ntype*ntype pair combinations, and 
- *                    the size of this array is the number of such pairs times
- *                    the number of parameters.
- *                    With 'nparam' parameters, and 'ntype' types in total,
- *                    the first parameter for interactions between atoms of VdW 
- *                    type 'ti' and VdW type 'tj' is located in
- *                    <tt>vdwparam[nparam*(ntype*ti+tj)]</tt>, and the 
- *                    additional parameters follow directly in the next 
- *                    position(s). Note that the array corresponds to a 
- *                    symmetric matrix - you should get the same parameters if
- *                    you swap ti and tj. This parameter can be NULL 
- *                    if the kernel does not include VdW interactions.
- *  \param vvdw       List of Van der Waals energies, the length is equal to 
- *                    the number of energy group combinations. As described 
- *                    for gid above, the coulomb energy of each outer list
- *                    will be added to vc[gid[i]]. Can be NULL for
- *                    kernels without electrostatic interaction.
- *  \param tabscale   Distance between table points in the vftab table.
- *                    This is always the same for Coulomb and VdW when both
- *                    are tabulated. If the kernel does not include any
- *                    tabulated interactions it can be zero, or NULL when
- *                    passed-by-reference.
- *  \param vftab      Table data for Coulomb and/or VdW interactions. 
- *                    Each 'point' in the table consists of the four 
- *                    floating-point numbers Y,F,G,H as described in the
- *                    Gromacs manual. If the kernel only tabulates the 
- *                    Coulomb interaction this is followed by the next
- *                    point. If both Coulomb and Van der Waals interactions 
- *                    are tabulated it is instead followed by another
- *                    four numbers for the dispersion table at the same
- *                    point, and then finally the repulsion table, i.e.
- *                    a total of 12 numbers per table point. If only 
- *                    Van der Waals interactions are tabulated the 
- *                    Dispersion and Repulsion table (in that order) use
- *                    a total of 8 floating-point numbers per point.
- *                    This array only needs to be provided for kernels with
- *                    tabulated interactions - otherwise it can be NULL.
- *  \param invsqrta   Array with the inverse square root of the born radius
- *                    of each atom. This can safely be NULL when the kernel
- *                    does not calculate Generalized Born interactions.
- *  \param dvda       Array where the derivative of the potential with respect
- *                    to the born radius for each atom will be written. This
- *                    is necessary to calculate the effect of born radius
- *                    derivatives on forces. However, just as for the force
- *                    arrays it must be provided even when not calculating
- *                    forces, since an implementation might not provide a
- *                    non-force version of the routine. When the kernel does
- *                    not calculate Generalized Born interactions it can 
- *                    safely be set to NULL, though.
- *  \param gbtabscale Distance between (scaled) table points for tabulated 
- *                    Generalized Born interactions. If the kernel does not 
- *                    calculate Generalized Born interactions it can be zero, 
- *                    or NULL when passed-by-reference. Note that this is 
- *                    usually different from the standard (non-GB) table scale.
- *  \param gbtab      Table data for Generalized Born Coulomb interactions.
- *                    Since these interactions contain parameters that 
- *                    enter in a non-trivial way I have introduced a trick
- *                    where the tabulated interaction is a function of
- *                    the distance scaled with the square roots of the two
- *                    born radii. Apart from this it contains similar 
- *                    data (Y,F,G,H) as the normal Coulomb/VdW tables.
- *                    See comments in the table code for details. This
- *                    can safely be set to NULL if you are not using
- *                    Generalized Born interactions.
- *  \param nthreads   Number of threads calling the kernel concurrently.
- *                    This value is only used to optimize the chunk size
- *                    of the neighborlists processed by each thread, so
- *                    it won't cause incorrect results if you get it wrong.
- *  \param count      Pointer to a common counter to synchronize 
- *                    multithreaded execution. This is normally the counter
- *                    in the neighborlist. It must be set to zero before
- *                    calling the kernel to get correct results. Note that
- *                    this is necessary even for a single thread if Gromacs
- *                    is compiled with thread support (meaning it is
- *                    a mandatory parameter).
- *  \param mtx        Pointer to a mutex protecting the counter, masked as 
- *                    a pointer-to-void since thread support is optional. 
- *                    However, if Gromacs is compiled with threads you \a must 
- *                    provide a mutex - it would hurt performance to check
- *                    it for each iteration in the nonbonded kernel. The
- *                    mutex is normally created automatically by the 
- *                    neighborlist initialization routine gmx_nlist_init().
- *                    (Yet another reason not to call kernels directly).
- *  \param outeriter  The number of iterations performed in the outer loop
- *                    of the kernel will be written to this integer. If
- *                    multiple threads are calling the loop this will be
- *                    the part of nri handled by this thread. 
- *                    This value is only used to get accurate flop accounting.
- *                    Since we cannot check for NULL pointers in Fortran it must
- *                    always be provided, but you can discard
- *                    the result if you don't need it.
- *  \param inneriter  The number of iterations performed in the inner loop
- *                    of the kernel will be written to this integer; compare
- *                    with nouter. Since we cannot check for NULL pointers in
- *                    Fortran it must always be provided, but you can discard
- *                    the result if you don't need it.
- *  \param work       Double precision work array - to be used for optimization.
- *
- *  \warning          There is \a very little error control inside
- *                    the nonbonded kernels in order to maximize performance.
- *                    It cannot be overemphasized that you should never call
- *                    them directly, but rely on the higher-level routines.
- */
-typedef void
-nb_kernel_t(int *             nri,
-            int *             iinr,
-            int *             jindex,
-            int *             jjnr,
-            int *             shift,
-            real *            shiftvec,
-            real *            fshift,
-            int *             gid,
-            real *            pos,
-            real *            faction,
-            real *            charge,
-            real *            facel,
-            real *            krf,
-            real *            crf,
-            real *            vc,
-            int *             type,
-            int *             ntype,
-            real *            vdwparam,
-            real *            vvdw,
-            real *            tabscale,
-            real *            vftab,
-            real *            invsqrta,
-            real *            dvda,
-            real *            gbtabscale,
-            real *            gbtab,
-            int *             nthreads,
-            int *             count,
-            void *            mtx,
-            int *             outeriter,
-            int *             inneriter,
-            real *            work);
-
-
-
-typedef void
-nb_adress_kernel_t(int *             nri,
-            int *             iinr,
-            int *             jindex,
-            int *             jjnr,
-            int *             shift,
-            real *            shiftvec,
-            real *            fshift,
-            int *             gid,
-            real *            pos,
-            real *            faction,
-            real *            charge,
-            real *            facel,
-            real *            krf,
-            real *            crf,
-            real *            vc,
-            int *             type,
-            int *             ntype,
-            real *            vdwparam,
-            real *            vvdw,
-            real *            tabscale,
-            real *            vftab,
-            real *            invsqrta,
-            real *            dvda,
-            real *            gbtabscale,
-            real *            gbtab,
-            int *             nthreads,
-            int *             count,
-            void *            mtx,
-            int *             outeriter,
-            int *             inneriter,
-            real              force_cap,
-            real *            wf);
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* _NB_KERNEL_H_ */
index d0120c03fa88385b586da6327ca2be438573df8e..8efe33e40fc88ed09b640b80e1d31c2e12d0e791 100644 (file)
 #include <config.h>
 #endif
 
-#ifdef GMX_THREAD_SHM_FDECOMP
+#ifdef GMX_THREAD_MPI
 #include <thread_mpi.h>
 #endif
 
-
 #include <stdio.h>
 #include <stdlib.h>
 #include "typedefs.h"
@@ -51,6 +50,7 @@
 #include "vec.h"
 #include "maths.h"
 #include "macros.h"
+#include "string2.h"
 #include "force.h"
 #include "names.h"
 #include "main.h"
 #include "smalloc.h"
 #include "nonbonded.h"
 
+#include "nb_kernel.h"
 #include "nb_kernel_c/nb_kernel_c.h"
-#include "nb_kernel_adress_c/nb_kernel_c_adress.h"
+
 #include "nb_free_energy.h"
 #include "nb_generic.h"
 #include "nb_generic_cg.h"
 #include "nb_generic_adress.h"
 
 
-/* 1,4 interactions uses kernel 330 directly */
-#include "nb_kernel_c/nb_kernel330.h" 
-#include "nb_kernel_adress_c/nb_kernel330_adress.h"
-
-
-
-#if 0 && defined (GMX_X86_SSE2)
-#  ifdef GMX_DOUBLE
-#    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
-#  else
-#    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
-#  endif
-#endif
 
-#if defined(GMX_FORTRAN)
-#  ifdef GMX_DOUBLE
-#    include "nb_kernel_f77_double/nb_kernel_f77_double.h"
-#  else
-#    include "nb_kernel_f77_single/nb_kernel_f77_single.h"
-#  endif
+#ifdef GMX_THREAD_MPI
+static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 #endif
+static gmx_bool            nonbonded_setup_done  = FALSE;
 
 
-#ifdef GMX_POWER6
-#include "nb_kernel_power6/nb_kernel_power6.h"
+void
+gmx_nonbonded_setup(FILE *         fplog,
+                    t_forcerec *   fr,
+                    gmx_bool       bGenericKernelOnly)
+{
+#ifdef GMX_THREAD_MPI
+    tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 #endif
+    /* Here we are guaranteed only one thread made it. */
+    if(nonbonded_setup_done==FALSE)
+    {
+        if(bGenericKernelOnly==FALSE)
+        {
+            /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 
-#ifdef GMX_BLUEGENE
-#include "nb_kernel_bluegene/nb_kernel_bluegene.h"
-#endif
+            /* Add interaction-specific C kernels */
+            nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
 
+            /* Add interaction-specific accelerated SSE kernels, etc. here */
+        }
+        /* Create a hash for faster lookups */
+        nb_kernel_list_hash_init();
 
+        nonbonded_setup_done=TRUE;
+    }
+#ifdef GMX_THREAD_MPI
+    tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
+#endif
+}
 
-enum { TABLE_NONE, TABLE_COMBINED, TABLE_COUL, TABLE_VDW, TABLE_NR };
 
 
-/* Table version for each kernel.
- */
-static const int 
-nb_kernel_table[eNR_NBKERNEL_NR] = 
+void
+gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
 {
-  TABLE_NONE,     /* kernel010 */
-  TABLE_NONE,     /* kernel020 */
-  TABLE_VDW,      /* kernel030 */
-  TABLE_NONE,     /* kernel100 */
-  TABLE_NONE,     /* kernel101 */
-  TABLE_NONE,     /* kernel102 */
-  TABLE_NONE,     /* kernel103 */
-  TABLE_NONE,     /* kernel104 */
-  TABLE_NONE,     /* kernel110 */
-  TABLE_NONE,     /* kernel111 */
-  TABLE_NONE,     /* kernel112 */
-  TABLE_NONE,     /* kernel113 */
-  TABLE_NONE,     /* kernel114 */
-  TABLE_NONE,     /* kernel120 */
-  TABLE_NONE,     /* kernel121 */
-  TABLE_NONE,     /* kernel122 */
-  TABLE_NONE,     /* kernel123 */
-  TABLE_NONE,     /* kernel124 */
-  TABLE_VDW,      /* kernel130 */
-  TABLE_VDW,      /* kernel131 */
-  TABLE_VDW,      /* kernel132 */
-  TABLE_VDW,      /* kernel133 */
-  TABLE_VDW,      /* kernel134 */
-  TABLE_NONE,     /* kernel200 */
-  TABLE_NONE,     /* kernel201 */
-  TABLE_NONE,     /* kernel202 */
-  TABLE_NONE,     /* kernel203 */
-  TABLE_NONE,     /* kernel204 */
-  TABLE_NONE,     /* kernel210 */
-  TABLE_NONE,     /* kernel211 */
-  TABLE_NONE,     /* kernel212 */
-  TABLE_NONE,     /* kernel213 */
-  TABLE_NONE,     /* kernel214 */
-  TABLE_NONE,     /* kernel220 */
-  TABLE_NONE,     /* kernel221 */
-  TABLE_NONE,     /* kernel222 */
-  TABLE_NONE,     /* kernel223 */
-  TABLE_NONE,     /* kernel224 */
-  TABLE_VDW,      /* kernel230 */
-  TABLE_VDW,      /* kernel231 */
-  TABLE_VDW,      /* kernel232 */
-  TABLE_VDW,      /* kernel233 */
-  TABLE_VDW,      /* kernel234 */
-  TABLE_COUL,     /* kernel300 */
-  TABLE_COUL,     /* kernel301 */
-  TABLE_COUL,     /* kernel302 */
-  TABLE_COUL,     /* kernel303 */
-  TABLE_COUL,     /* kernel304 */
-  TABLE_COUL,     /* kernel310 */
-  TABLE_COUL,     /* kernel311 */
-  TABLE_COUL,     /* kernel312 */
-  TABLE_COUL,     /* kernel313 */
-  TABLE_COUL,     /* kernel314 */
-  TABLE_COUL,     /* kernel320 */
-  TABLE_COUL,     /* kernel321 */
-  TABLE_COUL,     /* kernel322 */
-  TABLE_COUL,     /* kernel323 */
-  TABLE_COUL,     /* kernel324 */
-  TABLE_COMBINED, /* kernel330 */
-  TABLE_COMBINED, /* kernel331 */
-  TABLE_COMBINED, /* kernel332 */
-  TABLE_COMBINED, /* kernel333 */
-  TABLE_COMBINED, /* kernel334 */
-  TABLE_NONE,     /* kernel400 */
-  TABLE_NONE,     /* kernel410 */
-  TABLE_VDW       /* kernel430 */
-};
-
+    const char *     elec;
+    const char *     elec_mod;
+    const char *     vdw;
+    const char *     vdw_mod;
+    const char *     geom;
+    const char *     other;
+    const char *     vf;
+
+    const char *     archs[] = { "c" };
+    int              i;
+
+    if(nonbonded_setup_done==FALSE)
+    {
+        /* We typically call this setup routine before starting timers,
+         * but if that has not been done for whatever reason we do it now.
+         */
+        gmx_nonbonded_setup(NULL,NULL,FALSE);
+    }
 
+    /* Not used yet */
+    other="";
 
-static nb_kernel_t **
-nb_kernel_list = NULL;
+    nl->kernelptr_vf = NULL;
+    nl->kernelptr_v  = NULL;
+    nl->kernelptr_f  = NULL;
 
-static nb_adress_kernel_t **
-nb_kernel_list_adress = NULL;
+    elec     = gmx_nbkernel_elec_names[nl->ielec];
+    elec_mod = eintmod_names[nl->ielecmod];
+    vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
+    vdw_mod  = eintmod_names[nl->ivdwmod];
+    geom     = gmx_nblist_geometry_names[nl->igeometry];
 
-void
-gmx_setup_kernels(FILE *fplog,t_forcerec *fr,gmx_bool bGenericKernelOnly)
-{
-    int i;
-        
-    snew(nb_kernel_list,eNR_NBKERNEL_NR);
-    
-    /* Note that later calls overwrite earlier, so the preferred (fastest)
-     * version should be at the end. For instance, we call SSE after 3DNow.
-     */
-    
-    for(i=0; i<eNR_NBKERNEL_NR; i++)
-    {
-        nb_kernel_list[i] = NULL;
-    }
-    
-    if (bGenericKernelOnly)
-    {
-        return;
-    }
-       
-       if(fplog)
+    if(nl->free_energy)
     {
-           fprintf(fplog,"Configuring nonbonded kernels...\n");
+        nl->kernelptr_vf = gmx_nb_free_energy_kernel;
+        nl->kernelptr_f  = gmx_nb_free_energy_kernel;
     }
-       
-    nb_kernel_setup(fplog,nb_kernel_list);
-    
-    if(fr->use_cpu_acceleration==FALSE)
+    else if(!gmx_strcasecmp_min(geom,"CG-CG"))
     {
-        return;
+        nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
+        nl->kernelptr_f  = gmx_nb_generic_cg_kernel;
     }
-
-    /* Setup kernels. The last called setup routine will overwrite earlier assignments,
-        * so we should e.g. test SSE3 support _after_ SSE2 support,
-     * and call e.g. Fortran setup before SSE.
-        */
-    
-#if defined(GMX_FORTRAN) && defined(GMX_DOUBLE)   
-    nb_kernel_setup_f77_double(fplog,nb_kernel_list);
-#endif
-       
-#if defined(GMX_FORTRAN) && !defined(GMX_DOUBLE)   
-    nb_kernel_setup_f77_single(fplog,nb_kernel_list);
-#endif
-       
-#ifdef GMX_BLUEGENE
-    nb_kernel_setup_bluegene(fplog,nb_kernel_list);
-#endif
-       
-#ifdef GMX_POWER6
-    nb_kernel_setup_power6(fplog,nb_kernel_list);
-#endif
-       
-       if(fplog)
+    else
     {
-           fprintf(fplog,"\n\n");
-    }
-}
-
-void
-gmx_setup_adress_kernels(FILE *fplog,gmx_bool bGenericKernelOnly) {
-    int i;
+        /* Try to find a specific kernel first */
 
-    snew(nb_kernel_list_adress, eNR_NBKERNEL_NR);
-
-    for (i = 0; i < eNR_NBKERNEL_NR; i++) {
-        nb_kernel_list_adress[i] = NULL;
-    }
+        for(i=0;i<asize(archs) && nl->kernelptr_vf==NULL ;i++)
+        {
+               nl->kernelptr_vf = nb_kernel_list_findkernel(log,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
+        }
+        for(i=0;i<asize(archs) && nl->kernelptr_f==NULL ;i++)
+        {
+            nl->kernelptr_f  = nb_kernel_list_findkernel(log,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
+            /* If there is not force-only optimized kernel, is there a potential & force one? */
+            if(nl->kernelptr_f == NULL)
+            {
+                nl->kernelptr_f  = nb_kernel_list_findkernel(NULL,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
+            }
+        }
 
-    if (bGenericKernelOnly)
-    {
-        return;
+        /* Give up, pick a generic one instead */
+        if(nl->kernelptr_vf==NULL)
+        {
+            nl->kernelptr_vf = gmx_nb_generic_kernel;
+            nl->kernelptr_f  = gmx_nb_generic_kernel;
+            if(log)
+            {
+                fprintf(log,
+                        "WARNING - Slow generic NB kernel used for neighborlist with\n"
+                        "    Elec: '%s', Modifier: '%s'\n"
+                        "    Vdw:  '%s', Modifier: '%s'\n"
+                        "    Geom: '%s', Other: '%s'\n\n",
+                        elec,elec_mod,vdw,vdw_mod,geom,other);
+            }
+        }
     }
 
-    nb_kernel_setup_adress(fplog, nb_kernel_list_adress);
+    return;
 }
 
 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
-                  rvec x[],rvec f[],t_mdatoms *mdatoms,t_blocka *excl,
-                  real egnb[],real egcoul[],real egpol[],rvec box_size,
+                  rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
+                  gmx_grppairener_t *grppener,rvec box_size,
                   t_nrnb *nrnb,real *lambda, real *dvdl,
                   int nls,int eNL,int flags)
 {
-    gmx_bool            bLR,bDoForces,bForeignLambda;
-       t_nblist *      nlist;
-       real *          fshift;
-       int             n,n0,n1,i,i0,i1,nrnb_ind,sz;
-       t_nblists       *nblists;
-       gmx_bool        bWater;
-       FILE *          fp;
-       int             fac=0;
-       int             nthreads = 1;
-       int             tabletype;
-       int             outeriter,inneriter;
-       real *          tabledata = NULL;
-       gmx_gbdata_t    gbdata;
-
-        nb_kernel_t         *kernelptr=NULL;
-        nb_adress_kernel_t  *adresskernelptr=NULL;
+       t_nblist *        nlist;
+       int               n,n0,n1,i,i0,i1,sz,range;
+       t_nblists *       nblists;
+    nb_kernel_data_t  kernel_data;
+    nb_kernel_t *     kernelptr=NULL;
+    rvec *            f;
     
-        gmx_bool        bCG; /* for AdresS */
-        int             k;/* for AdresS */
-
-    bLR            = (flags & GMX_DONB_LR);
-    bDoForces      = (flags & GMX_DONB_FORCES);
-    bForeignLambda = (flags & GMX_DONB_FOREIGNLAMBDA); 
-
-    bCG = FALSE;  /* for AdresS */
-
-       gbdata.gb_epsilon_solvent = fr->gb_epsilon_solvent;
-       gbdata.epsilon_r = fr->epsilon_r;
-       gbdata.gpol               = egpol;
+    kernel_data.flags                   = flags;
+    kernel_data.exclusions              = excl;
+    kernel_data.lambda                  = lambda;
+    kernel_data.dvdl                    = dvdl;
     
-    if (!fr->adress_type==eAdressOff && !bDoForces){
-        gmx_fatal(FARGS,"No force kernels not implemeted for adress");
-    }
-
-    if(fr->bAllvsAll) 
+    if(fr->bAllvsAll)
     {
-        if(fr->bGB)
-        {
-#if 0 && defined (GMX_X86_SSE2)
-# ifdef GMX_DOUBLE
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsallgb_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                                 &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else
-            {
-                nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                     &outeriter,&inneriter,&fr->AllvsAll_work);        
-            }
-#  else /* not double */
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsallgb_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                                 &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else
-            {
-                nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                     &outeriter,&inneriter,&fr->AllvsAll_work);        
-            }
-#  endif /* double/single alt. */
-#else /* no SSE support compiled in */
-            nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                 &outeriter,&inneriter,&fr->AllvsAll_work);                    
-#endif
-            inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALLGB,inneriter);
-        }
-        else
-        { 
-#if 0 && defined (GMX_X86_SSE2)
-# ifdef GMX_DOUBLE
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsall_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                               &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else 
-            {
-                nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                   &outeriter,&inneriter,&fr->AllvsAll_work);            
-            }
-            
-#  else /* not double */
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsall_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                               &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else 
-            {
-                nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                   &outeriter,&inneriter,&fr->AllvsAll_work);            
-            }
-
-#  endif /* double/single check */
-#else /* No SSE2 support compiled in */
-            nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                               &outeriter,&inneriter,&fr->AllvsAll_work);
-#endif            
-            
-            inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALL,inneriter);
-        }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,outeriter);
         return;
     }
        
-    if (eNL >= 0) 
+    if (eNL >= 0)
     {
                i0 = eNL;
                i1 = i0+1;
@@ -390,340 +228,163 @@ void do_nonbonded(t_commrec *cr,t_forcerec *fr,
                i1 = eNL_NR;
        }
        
-       if (nls >= 0) 
+       if (nls >= 0)
        {
                n0 = nls;
                n1 = nls+1;
        }
-       else 
+       else
        {
                n0 = 0;
                n1 = fr->nnblists;
        }
-       
-       if(nb_kernel_list == NULL)
-    {
-               gmx_fatal(FARGS,"gmx_setup_kernels has not been called");
-    }
-  
-    fshift = fr->fshift[0];
-  
-       for(n=n0; (n<n1); n++) 
+
+       for(n=n0; (n<n1); n++)
        {
                nblists = &fr->nblists[n];
-               for(i=i0; (i<i1); i++) 
-               {
-                       outeriter = inneriter = 0;
-      
-                       if (bLR) 
-                       {
-                               nlist = &(nblists->nlist_lr[i]);
-                       }
-                       else
-                       {
-                               nlist = &(nblists->nlist_sr[i]);
-                       }
-                       
-                       if (nlist->nri > 0) 
-                       {
-                               nrnb_ind = nlist->il_code;
-                               
-                               if(nrnb_ind==eNR_NBKERNEL_FREE_ENERGY)
-                               {
-                                       /* generic free energy, use combined table */
-                                       tabledata = nblists->tab.tab;
-                               }
-                               else
-                               {
-                    if (bForeignLambda)
-                    {
-                        /* We don't need the non-perturbed interactions */
-                        continue;
-                    }
 
-                                       tabletype = nb_kernel_table[nrnb_ind];
-                                       
-                                       /* normal kernels, not free energy */
-                                       if (!bDoForces)
-                                       {
-                                               nrnb_ind += eNR_NBKERNEL_NR/2;
-                                       }
-                                       
-                                       if(tabletype == TABLE_COMBINED)
-                                       {
-                                               tabledata = nblists->tab.tab;
-                                       }
-                                       else if(tabletype == TABLE_COUL)
-                                       {
-                                               tabledata = nblists->coultab;
-                                       }
-                                       else if(tabletype == TABLE_VDW)
-                                       {
-                                               tabledata = nblists->vdwtab;
-                                       }
-                                       else
-                                       {
-                                               tabledata = NULL;
-                                       }
-                               }
-                               
-                               nlist->count = 0;
-                               
-                               if(nlist->free_energy)
-                               {
-                                       if(nlist->ivdw==enbvdwBHAM)
-                                       {
-                                               gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
-                                       }
-                                       
-                                       gmx_nb_free_energy_kernel(nlist->icoul,
-                                                                                         nlist->ivdw,
-                                                                                         nlist->nri,
-                                                                                         nlist->iinr,
-                                                                                         nlist->jindex,
-                                                                                         nlist->jjnr,
-                                                                                         nlist->shift,
-                                                                                         fr->shift_vec[0],
-                                                                                         fshift,
-                                                                                         nlist->gid,
-                                                                                         x[0],
-                                                                                         f[0],
-                                                                                         mdatoms->chargeA,
-                                                                                         mdatoms->chargeB,
-                                                                                         fr->epsfac,
-                                                                                         fr->k_rf,
-                                                                                         fr->c_rf,
-                                                                                         fr->ewaldcoeff,
-                                                                                         egcoul,
-                                                                                         mdatoms->typeA,
-                                                                                         mdatoms->typeB,
-                                                                                         fr->ntype,
-                                                                                         fr->nbfp,
-                                                                                         egnb,
-                                                                                         nblists->tab.scale,
-                                                                                         tabledata,
-                                                                                         lambda[efptCOUL],
-                                              lambda[efptVDW],
-                                              dvdl,
-                                              fr->sc_alphacoul,
-                                                                                         fr->sc_alphavdw,
-                                                                                         fr->sc_power,
-                                                                                         fr->sc_r_power,
-                                                                                         fr->sc_sigma6_def,
-                                              fr->sc_sigma6_min,
-                                              bDoForces,
-                                                                                         &outeriter,
-                                                                                         &inneriter);
-                }
-                else if (nlist->enlist == enlistCG_CG)
-                {
-                   if (fr->adress_type==eAdressOff){
-                    /* Call the charge group based inner loop */
-                       gmx_nb_generic_cg_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter);
-                   }
-                   else
-                   {
-                       /*gmx_nb_generic_adress_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter);*/
-                          gmx_fatal(FARGS,"Death & horror! Adress cgcg kernel not implemented anymore.\n");
+        kernel_data.table_elec              = &nblists->table_elec;
+        kernel_data.table_vdw               = &nblists->table_vdw;
+        kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 
-                   }
+        for(range=0;range<2;range++)
+        {
+            /* Are we doing short/long-range? */
+            if(range==0)
+            {
+                /* Short-range */
+                if(!(flags & GMX_NONBONDED_DO_SR))
+                {
+                    continue;
                 }
-                else
+                kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
+                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
+                kernel_data.energygrp_polarization  = grppener->ener[egGB];
+                nlist = nblists->nlist_sr;
+                f                                   = f_shortrange;
+            }
+            else if(range==1)
+            {
+                /* Long-range */
+                if(!(flags & GMX_NONBONDED_DO_LR))
                 {
-                    /* AdresS*/
-                    /* for adress we need to determine for each energy group wether it is explicit or coarse-grained */
-                    if (!fr->adress_type == eAdressOff) {                        
-                        bCG = FALSE;
-                        if ( !fr->adress_group_explicit[ mdatoms->cENER[nlist->iinr[0]] ] ){
-                            bCG=TRUE;
-                        }
-                        /* If this processor has only explicit atoms (w=1)
-                          skip the coarse grained force calculation. Same for
-                         only coarsegrained atoms and explicit interactions.
-                         Last condition is to make sure that generic kernel is not
-                         skipped*/
-                        if (mdatoms->pureex && bCG && nb_kernel_list[nrnb_ind] != NULL) continue;
-                        if (mdatoms->purecg && !bCG && nb_kernel_list[nrnb_ind] != NULL) continue;
-                    }
-
-                    if (fr->adress_type == eAdressOff ||
-                            mdatoms->pureex ||
-                            mdatoms->purecg){
-                        /* if we only have to calculate pure cg/ex interactions
-                         we can use the faster standard gromacs kernels*/
-                        kernelptr = nb_kernel_list[nrnb_ind];
-                    }else{
-                        /* This processor has hybrid interactions which means
-                         * we have to
-                         * use our own kernels. We have two kernel types: one that
-                         * calculates the forces with the explicit prefactor w1*w2
-                         * and one for coarse-grained with (1-w1*w2)
-                         * explicit kernels are the second part of the kernel
-                         *  list */
-                        if (!bCG) nrnb_ind += eNR_NBKERNEL_NR/2;                      
-                        adresskernelptr = nb_kernel_list_adress[nrnb_ind];
-                    }
-                    
-                    if (kernelptr == NULL && adresskernelptr == NULL)
-                     {
-                        /* Call a generic nonbonded kernel */
-                        
-                        /* If you want to hack/test your own interactions,
-                         * do it in this routine and make sure it is called
-                         * by setting the environment variable GMX_NB_GENERIC.
-                         */
-                        if (fr->adress_type==eAdressOff){
-
-                        gmx_nb_generic_kernel(nlist,
-                                              fr,
-                                              mdatoms,
-                                              x[0],
-                                              f[0],
-                                              fshift,
-                                              egcoul,
-                                              egnb,
-                                              nblists->tab.scale,
-                                              tabledata,
-                                              &outeriter,
-                                              &inneriter);
-                        }else /* do generic AdResS kernels (slow)*/
-                        {
-
-                            gmx_nb_generic_adress_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter,
-                                                bCG);
-                        }
-
+                    continue;
+                }
+                kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
+                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
+                kernel_data.energygrp_polarization  = grppener->ener[egGB];
+                nlist = nblists->nlist_lr;
+                f                                   = f_longrange;
+            }
 
+            for(i=i0; (i<i1); i++)
+            {
+                if (nlist[i].nri > 0)
+                {
+                    if(flags & GMX_NONBONDED_DO_POTENTIAL)
+                    {
+                        /* Potential and force */
+                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
                     }
                     else
                     {
-                        /* Call nonbonded kernel from function pointer */
-                        if (kernelptr!=NULL){
-                        (*kernelptr)( &(nlist->nri),
-                                      nlist->iinr,
-                                      nlist->jindex,
-                                      nlist->jjnr,
-                                      nlist->shift,
-                                      fr->shift_vec[0],
-                                      fshift,
-                                      nlist->gid,
-                                      x[0],
-                                      f[0],
-                                      mdatoms->chargeA,
-                                      &(fr->epsfac),
-                                      &(fr->k_rf),
-                                      &(fr->c_rf),
-                                      egcoul,
-                                      mdatoms->typeA,
-                                      &(fr->ntype),
-                                      fr->nbfp,
-                                      egnb,
-                                      &(nblists->tab.scale),
-                                      tabledata,
-                                      fr->invsqrta,
-                                      fr->dvda,
-                                      &(fr->gbtabscale),
-                                      fr->gbtab.tab,
-                                      &nthreads,
-                                      &(nlist->count),
-                                      nlist->mtx,
-                                      &outeriter,
-                                      &inneriter,
-                                      (real *)&gbdata);
-                        }else if (adresskernelptr != NULL)
-                        { /* Adress kernels */
-                          (*adresskernelptr)( &(nlist->nri),
-                                      nlist->iinr,
-                                      nlist->jindex,
-                                      nlist->jjnr,
-                                      nlist->shift,
-                                      fr->shift_vec[0],
-                                      fshift,
-                                      nlist->gid,
-                                      x[0],
-                                      f[0],
-                                      mdatoms->chargeA,
-                                      &(fr->epsfac),
-                                      &(fr->k_rf),
-                                      &(fr->c_rf),
-                                      egcoul,
-                                      mdatoms->typeA,
-                                      &(fr->ntype),
-                                      fr->nbfp,
-                                      egnb,
-                                      &(nblists->tab.scale),
-                                      tabledata,
-                                      fr->invsqrta,
-                                      fr->dvda,
-                                      &(fr->gbtabscale),
-                                      fr->gbtab.tab,
-                                      &nthreads,
-                                      &(nlist->count),
-                                      nlist->mtx,
-                                      &outeriter,
-                                      &inneriter,
-                                      fr->adress_ex_forcecap,
-                                      mdatoms->wf);
-                        }
+                        /* Force only, no potential */
+                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
                     }
-                }
-                
-                /* Update flop accounting */
-                               
-                               /* Outer loop in kernel */
-                switch (nlist->enlist) {
-                case enlistATOM_ATOM:   fac =  1; break;
-                case enlistSPC_ATOM:    fac =  3; break;
-                case enlistSPC_SPC:     fac =  9; break;
-                case enlistTIP4P_ATOM:  fac =  4; break;
-                case enlistTIP4P_TIP4P: fac = 16; break;
-                case enlistCG_CG:       fac =  1; break;
-                }
-                inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fac*outeriter);
 
-                /* inner loop in kernel */
-                inc_nrnb(nrnb,nrnb_ind,inneriter);
+                    if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
+                    {
+                        /* We don't need the non-perturbed interactions */
+                        continue;
+                    }
+                    (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
+                 }
             }
         }
     }
 }
 
+static void
+nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
+{
+    gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
+                "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
+                "This is likely either a 1,4 interaction, or a listed interaction inside\n"
+                "a smaller molecule you are decoupling during a free energy calculation.\n"
+                "Since interactions at distances beyond the table cannot be computed,\n"
+                "they are skipped until they are inside the table limit again. You will\n"
+                "only see this message once, even if it occurs for several interactions.\n\n"
+                "IMPORTANT: This should not happen in a stable simulation, so there is\n"
+                "probably something wrong with your system. Only change the table-extension\n"
+                "distance in the mdp file if you are really sure that is the reason.\n",
+                glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
+
+    if (debug)
+    {
+        fprintf(debug,
+                "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
+                x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
+                glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
+    }
+}
+
+
 
-real 
-do_listed_vdw_q(int ftype,int nbonds,
+/* This might logically belong better in the nb_generic.c module, but it is only
+ * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
+ * extra functional call for every single pair listed in the topology.
+ */
+static real
+nb_evaluate_single(real r2, real tabscale,real *vftab,
+                   real qq, real c6, real c12, real *velec, real *vvdw)
+{
+    real       rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
+    int        ntab;
+
+    /* Do the tabulated interactions - first table lookup */
+    rinv             = gmx_invsqrt(r2);
+    r                = r2*rinv;
+    rtab             = r*tabscale;
+    ntab             = rtab;
+    eps              = rtab-ntab;
+    eps2             = eps*eps;
+    ntab             = 12*ntab;
+    /* Electrostatics */
+    Y                = vftab[ntab];
+    F                = vftab[ntab+1];
+    Geps             = eps*vftab[ntab+2];
+    Heps2            = eps2*vftab[ntab+3];
+    Fp               = F+Geps+Heps2;
+    VVe              = Y+eps*Fp;
+    FFe              = Fp+Geps+2.0*Heps2;
+    /* Dispersion */
+    Y                = vftab[ntab+4];
+    F                = vftab[ntab+5];
+    Geps             = eps*vftab[ntab+6];
+    Heps2            = eps2*vftab[ntab+7];
+    Fp               = F+Geps+Heps2;
+    VVd              = Y+eps*Fp;
+    FFd              = Fp+Geps+2.0*Heps2;
+    /* Repulsion */
+    Y                = vftab[ntab+8];
+    F                = vftab[ntab+9];
+    Geps             = eps*vftab[ntab+10];
+    Heps2            = eps2*vftab[ntab+11];
+    Fp               = F+Geps+Heps2;
+    VVr              = Y+eps*Fp;
+    FFr              = Fp+Geps+2.0*Heps2;
+
+    *velec           = qq*VVe;
+    *vvdw            = c6*VVd+c12*VVr;
+
+    fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
+
+    return fscal;
+}
+
+
+real
+do_nonbonded_listed(int ftype,int nbonds,
                 const t_iatom iatoms[],const t_iparams iparams[],
                 const rvec x[],rvec f[],rvec fshift[],
                 const t_pbc *pbc,const t_graph *g,
@@ -732,371 +393,171 @@ do_listed_vdw_q(int ftype,int nbonds,
                 const t_forcerec *fr,gmx_grppairener_t *grppener,
                 int *global_atom_index)
 {
-    static    gmx_bool bWarn=FALSE;
-    real      eps,r2,*tab,rtab2=0;
-    rvec      dx,x14[2],f14[2];
-    int       i,ai,aj,itype;
-    int       typeA[2]={0,0},typeB[2]={0,1};
-    real      chargeA[2]={0,0},chargeB[2];
-    int       gid,shift_vir,shift_f;
-    int       j_index[] = { 0, 1 };
-    int       i0=0,i1=1,i2=2;
-    ivec      dt;
-    int       outeriter,inneriter;
-    int       nthreads = 1;
-    int       count;
-    real      krf,crf,tabscale;
-    int       ntype=0;
-    real      *nbfp=NULL;
-    real      *egnb=NULL,*egcoul=NULL;
-    t_nblist  tmplist;
-    int       icoul,ivdw;
-    gmx_bool      bMolPBC,bFreeEnergy;
+    int              ielec,ivdw;
+    real             qq,c6,c12;
+    rvec             dx;
+    ivec             dt;
+    int              i,j,itype,ai,aj,gid;
+    int              fshift_index;
+    real             r2,rinv;
+    real             fscal,velec,vvdw;
+    real *           energygrp_elec;
+    real *           energygrp_vdw;
+    static gmx_bool  warned_rlimit=FALSE;
+    /* Free energy stuff */
+    gmx_bool         bFreeEnergy;
+    real             LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
+    real             qqB,c6B,c12B,sigma2_def,sigma2_min;
     
-    gmx_bool      bCG; /* AdResS*/
-    real      wf14[2]={0,0}; /* AdResS*/
-   
-#if GMX_THREAD_SHM_FDECOMP
-    pthread_mutex_t mtx;
-#else
-    void *    mtx = NULL;
-#endif
-
     
-#if GMX_THREAD_SHM_FDECOMP
-    pthread_mutex_initialize(&mtx);
-#endif
-
-    bMolPBC = fr->bMolPBC;
-
     switch (ftype) {
-    case F_LJ14:
-    case F_LJC14_Q:
-        eps = fr->epsfac*fr->fudgeQQ;
-        ntype  = 1;
-        egnb   = grppener->ener[egLJ14];
-        egcoul = grppener->ener[egCOUL14];
-        break;
-    case F_LJC_PAIRS_NB:
-        eps = fr->epsfac;
-        ntype  = 1;
-        egnb   = grppener->ener[egLJSR];
-        egcoul = grppener->ener[egCOULSR];
-        break;
-    default:
-        gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",
-                  ftype);
-    }
-    tab = fr->tab14.tab;
-    rtab2 = sqr(fr->tab14.r);
-    tabscale = fr->tab14.scale;
-
-    krf = fr->k_rf;
-    crf = fr->c_rf;
-
-    /* Determine the values for icoul/ivdw. */
-    if (fr->bEwald) {
-        icoul = enbcoulOOR;
-    } 
-    else if(fr->bcoultab)
-    {
-        icoul = enbcoulTAB;
-    }
-    else if(fr->eeltype == eelRF_NEC)
-    {
-        icoul = enbcoulRF;
-    }
-    else 
-    {
-        icoul = enbcoulOOR;
+        case F_LJ14:
+        case F_LJC14_Q:
+            energygrp_elec = grppener->ener[egCOUL14];
+            energygrp_vdw  = grppener->ener[egLJ14];
+            break;
+        case F_LJC_PAIRS_NB:
+            energygrp_elec = grppener->ener[egCOULSR];
+            energygrp_vdw  = grppener->ener[egLJSR];
+            break;
+        default:
+            energygrp_elec = NULL; /* Keep compiler happy */
+            energygrp_vdw  = NULL; /* Keep compiler happy */
+            gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
+            break;
     }
     
-    if(fr->bvdwtab)
+    if(fr->efep != efepNO)
     {
-        ivdw = enbvdwTAB;
+        /* Lambda factor for state A=1-lambda and B=lambda */
+        LFC[0] = 1.0 - lambda[efptCOUL];
+        LFV[0] = 1.0 - lambda[efptVDW];
+        LFC[1] = lambda[efptCOUL];
+        LFV[1] = lambda[efptVDW];
+
+        /*derivative of the lambda factor for state A and B */
+        DLF[0] = -1;
+        DLF[1] = 1;
+
+        /* precalculate */
+        sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
+        sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
+
+        for (i=0;i<2;i++)
+        {
+            lfac_coul[i]  = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
+            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
+            lfac_vdw[i]   = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
+            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
+        }
     }
     else
     {
-        ivdw = enbvdwLJ;
+        sigma2_min = sigma2_def = 0;
     }
-    
-    
-    bCG = FALSE; /*Adres*/
-    /* We don't do SSE or altivec here, due to large overhead for 4-fold 
-     * unrolling on short lists 
-     */
-    
+
     bFreeEnergy = FALSE;
-    for(i=0; (i<nbonds); ) 
+    for(i=0; (i<nbonds); )
     {
         itype = iatoms[i++];
         ai    = iatoms[i++];
         aj    = iatoms[i++];
         gid   = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
         
-        if (!fr->adress_type == eAdressOff) {
-            if (fr->adress_group_explicit[md->cENER[ai]] != fr->adress_group_explicit[md->cENER[aj]]){
-                /*exclude cg-ex interaction*/
-                continue;
-            }           
-            bCG = !fr->adress_group_explicit[md->cENER[ai]];
-            wf14[0] = md->wf[ai];
-            wf14[1] = md->wf[aj];
-        }
+        /* Get parameters */
         switch (ftype) {
-        case F_LJ14:
-            bFreeEnergy =
+            case F_LJ14:
+                bFreeEnergy =
                 (fr->efep != efepNO &&
                  ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
                   iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
                   iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
-            chargeA[0] = md->chargeA[ai];
-            chargeA[1] = md->chargeA[aj];
-            nbfp = (real *)&(iparams[itype].lj14.c6A);
-            break;
-        case F_LJC14_Q:
-            eps = fr->epsfac*iparams[itype].ljc14.fqq;
-            chargeA[0] = iparams[itype].ljc14.qi;
-            chargeA[1] = iparams[itype].ljc14.qj;
-            nbfp = (real *)&(iparams[itype].ljc14.c6);
-            break;
-        case F_LJC_PAIRS_NB:
-            chargeA[0] = iparams[itype].ljcnb.qi;
-            chargeA[1] = iparams[itype].ljcnb.qj;
-            nbfp = (real *)&(iparams[itype].ljcnb.c6);
-            break;
+                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
+                c6               = iparams[itype].lj14.c6A;
+                c12              = iparams[itype].lj14.c12A;
+                break;
+            case F_LJC14_Q:
+                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
+                c6               = iparams[itype].ljc14.c6;
+                c12              = iparams[itype].ljc14.c12;
+                break;
+            case F_LJC_PAIRS_NB:
+                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
+                c6               = iparams[itype].ljcnb.c6;
+                c12              = iparams[itype].ljcnb.c12;
+                break;
+            default:
+                /* Cannot happen since we called gmx_fatal() above in this case */
+                qq = c6 = c12 = 0; /* Keep compiler happy */
+                break;
         }
         
-        if (!bMolPBC) 
+        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
+         * included in the general nfbp array now. This means the tables are scaled down by the
+         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
+         * be scaled up.
+         */
+        c6  *= 6.0;
+        c12 *= 12.0;
+        
+        /* Do we need to apply full periodic boundary conditions? */
+        if(fr->bMolPBC==TRUE)
         {
-            /* This is a bonded interaction, atoms are in the same box */
-            shift_f = CENTRAL;
-            r2 = distance2(x[ai],x[aj]);
+            fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
         }
-        else 
+        else
         {
-            /* Apply full periodic boundary conditions */
-            shift_f = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
-            r2 = norm2(dx);
+            fshift_index = CENTRAL;
+            rvec_sub(x[ai],x[aj],dx);
         }
+        r2           = norm2(dx);
 
-        if (r2 >= rtab2) 
+        if(r2>=fr->tab14.r*fr->tab14.r)
         {
-            if (!bWarn) 
+            if(warned_rlimit==FALSE)
             {
-                fprintf(stderr,"Warning: 1-4 interaction between %d and %d "
-                        "at distance %.3f which is larger than the 1-4 table size %.3f nm\n", 
-                       glatnr(global_atom_index,ai),
-                       glatnr(global_atom_index,aj),
-                       sqrt(r2), sqrt(rtab2));
-                fprintf(stderr,"These are ignored for the rest of the simulation\n");
-                fprintf(stderr,"This usually means your system is exploding,\n"
-                        "if not, you should increase table-extension in your mdp file\n"
-                        "or with user tables increase the table size\n");
-                bWarn = TRUE;
+                nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
+                warned_rlimit=TRUE;
             }
-            if (debug) 
-             fprintf(debug,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
-                     x[ai][XX],x[ai][YY],x[ai][ZZ],
-                     x[aj][XX],x[aj][YY],x[aj][ZZ],
-                     glatnr(global_atom_index,ai),
-                     glatnr(global_atom_index,aj),
-                     sqrt(r2));
+            continue;
         }
-        else 
+
+        if (bFreeEnergy)
         {
-            copy_rvec(x[ai],x14[0]);
-            copy_rvec(x[aj],x14[1]);
-            clear_rvec(f14[0]);
-            clear_rvec(f14[1]);
-#ifdef DEBUG
-            fprintf(debug,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n",
-                    md->cENER[ai],md->cENER[aj],md->nenergrp,gid);
-#endif
-            
-           outeriter = inneriter = count = 0;
-           if (bFreeEnergy)
+            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
+            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
+            c6B              = iparams[itype].lj14.c6B*6.0;
+            c12B             = iparams[itype].lj14.c12B*12.0;
+
+            fscal            = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
+                                                              fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
+                                                              LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
+                                                              fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
+        }
+        else
         {
-            chargeB[0] = md->chargeB[ai];
-            chargeB[1] = md->chargeB[aj];
-            /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix
-             * to the innerloops.
-             * Here we use that the LJ-14 parameters are stored in iparams
-             * as c6A,c12A,c6B,c12B, which are referenced correctly
-             * in the innerloops if we assign type combinations 0-0 and 0-1
-             * to atom pair ai-aj in topologies A and B respectively.
-             */
-
-            /* need to do a bit of a kludge here -- the way it is set up,
-               if the charges change, but the vdw do not, then even though bFreeEnergy is on,
-               it won't work, because all the bonds are perturbed.
-            */
-            if(ivdw==enbvdwBHAM)
-            {
-                gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
-            }
-            count = 0;
-            gmx_nb_free_energy_kernel(icoul,
-                                      ivdw,
-                                      i1,
-                                      &i0,
-                                      j_index,
-                                      &i1,
-                                      &shift_f,
-                                      fr->shift_vec[0],
-                                      fshift[0],
-                                      &gid,
-                                      x14[0],
-                                      f14[0],
-                                      chargeA,
-                                      chargeB,
-                                      eps,
-                                      krf,
-                                      crf,
-                                      fr->ewaldcoeff,
-                                      egcoul,
-                                      typeA,
-                                      typeB,
-                                      ntype,
-                                      nbfp,
-                                      egnb,
-                                      tabscale,
-                                      tab,
-                                      lambda[efptCOUL],
-                                      lambda[efptVDW],
-                                      dvdl,
-                                      fr->sc_alphacoul,
-                                      fr->sc_alphavdw,
-                                      fr->sc_power,
-                                      6.0, /* for 1-4's use the 6 power always - 48 power too high because of where they are forced to be */
-                                      fr->sc_sigma6_def,
-                                      fr->sc_sigma6_min,
-                                      TRUE,
-                                      &outeriter,
-                                      &inneriter);
+            /* Evaluate tabulated interaction without free energy */
+            fscal            = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
         }
-        else 
-        { 
-          if (fr->adress_type==eAdressOff || !fr->adress_do_hybridpairs){
-            /* Not perturbed - call kernel 330 */
-            nb_kernel330
-                ( &i1,
-                  &i0,
-                  j_index,
-                  &i1,
-                  &shift_f,
-                  fr->shift_vec[0],
-                  fshift[0],
-                  &gid,
-                  x14[0],
-                  f14[0],
-                  chargeA,
-                  &eps,
-                  &krf,
-                  &crf,
-                  egcoul,
-                  typeA,
-                  &ntype,
-                  nbfp,
-                  egnb,
-                  &tabscale,
-                  tab,
-                  NULL,
-                  NULL,
-                  NULL,
-                  NULL,
-                  &nthreads,
-                  &count,
-                  (void *)&mtx,
-                  &outeriter,
-                  &inneriter,
-                  NULL);                
-                } else {
-                    if (bCG) {
-                        nb_kernel330_adress_cg(&i1,
-                                &i0,
-                                j_index,
-                                &i1,
-                                &shift_f,
-                                fr->shift_vec[0],
-                                fshift[0],
-                                &gid,
-                                x14[0],
-                                f14[0],
-                                chargeA,
-                                &eps,
-                                &krf,
-                                &crf,
-                                egcoul,
-                                typeA,
-                                &ntype,
-                                nbfp,
-                                egnb,
-                                &tabscale,
-                                tab,
-                                NULL,
-                                NULL,
-                                NULL,
-                                NULL,
-                                &nthreads,
-                                &count,
-                                (void *) &mtx,
-                                &outeriter,
-                                &inneriter,
-                                fr->adress_ex_forcecap,
-                                wf14);
-                    } else {
-                        nb_kernel330_adress_ex(&i1,
-                                &i0,
-                                j_index,
-                                &i1,
-                                &shift_f,
-                                fr->shift_vec[0],
-                                fshift[0],
-                                &gid,
-                                x14[0],
-                                f14[0],
-                                chargeA,
-                                &eps,
-                                &krf,
-                                &crf,
-                                egcoul,
-                                typeA,
-                                &ntype,
-                                nbfp,
-                                egnb,
-                                &tabscale,
-                                tab,
-                                NULL,
-                                NULL,
-                                NULL,
-                                NULL,
-                                &nthreads,
-                                &count,
-                                (void *) &mtx,
-                                &outeriter,
-                                &inneriter,
-                                fr->adress_ex_forcecap,
-                                wf14);
-                    }
 
-                }
-            }
-        
+        energygrp_elec[gid]  += velec;
+        energygrp_vdw[gid]   += vvdw;
+        svmul(fscal,dx,dx);
+
         /* Add the forces */
-        rvec_inc(f[ai],f14[0]);
-        rvec_dec(f[aj],f14[0]);
-        
-        if (g) 
+        rvec_inc(f[ai],dx);
+        rvec_dec(f[aj],dx);
+
+        if (g)
         {
             /* Correct the shift forces using the graph */
-            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);    
-            shift_vir = IVEC2IS(dt);
-            rvec_inc(fshift[shift_vir],f14[0]);
-            rvec_dec(fshift[CENTRAL],f14[0]);
+            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+            fshift_index = IVEC2IS(dt);
         }
-        
-           /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */
+        if(fshift_index!=CENTRAL)
+        {
+            rvec_inc(fshift[fshift_index],dx);
+            rvec_dec(fshift[CENTRAL],dx);
         }
     }
     return 0.0;
diff --git a/src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py b/src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py
new file mode 100755 (executable)
index 0000000..46619c6
--- /dev/null
@@ -0,0 +1,963 @@
+#!/usr/bin/env python
+# Copyright (c) 2002-2008 ActiveState Software Inc.
+# License: MIT License (http://www.opensource.org/licenses/mit-license.php)
+# Original filename preprocess.py, see http://code.google.com/p/preprocess/
+#
+# Modified by Erik Lindahl 2009-2012 <lindahl@gromacs.org>
+# to enable advanced preprocessing for Gromacs kernels, including
+# preprocessor for-loops and substitution into preprocessor directives
+# as well as program strings.
+#
+# Please feel free to redistribute under same license as original (MIT),
+# but don't blame the original authors for mistakes in this version.
+#
+
+"""
+    Preprocess a file.
+
+    Command Line Usage:
+        gmxpreprocess [<options>...] <infile>
+
+    Options:
+        -h, --help      Print this help and exit.
+        -V, --version   Print the version info and exit.
+        -v, --verbose   Give verbose output for errors.
+
+        -o <outfile>    Write output to the given file instead of to stdout.
+        -f, --force     Overwrite given output file. (Otherwise an IOError
+                        will be raised if <outfile> already exists.
+        -D <define>     Define a variable for preprocessing. <define>
+                        can simply be a variable name (in which case it
+                        will be true) or it can be of the form
+                        <var>=<val>. An attempt will be made to convert
+                        <val> to an integer so "-D FOO=0" will create a
+                        false value.
+        -I <dir>        Add an directory to the include path for
+                        #include directives.
+
+        -k, --keep-lines    Emit empty lines for preprocessor statement
+                        lines and skipped output lines. This allows line
+                        numbers to stay constant.
+        -s, --no-substitute  Do NOT Substitute defines into emitted lines.
+        -c, --content-types-path <path>
+                        Specify a path to a content.types file to assist
+                        with filetype determination. See the
+                        `_gDefaultContentTypes` string in this file for
+                        details on its format.
+
+    Module Usage:
+        from gmxpreprocess import gmxpreprocess
+        gmxpreprocess(infile, outfile=sys.stdout, defines={}, force=0,
+                      keepLines=0, includePath=[], substitute=1,
+                      contentType=None)
+
+    The <infile> can be marked up with special preprocessor statement lines
+    of the form:
+        <comment-prefix> <preprocessor-statement> <comment-suffix>
+    where the <comment-prefix/suffix> are the native comment delimiters for
+    that file type.
+
+
+    Examples
+    --------
+
+    HTML (*.htm, *.html) or XML (*.xml, *.kpf, *.xul) files:
+
+        <!-- #if FOO -->
+        ...
+        <!-- #endif -->
+
+    Python (*.py), Perl (*.pl), Tcl (*.tcl), Ruby (*.rb), Bash (*.sh),
+    or make ([Mm]akefile*) files:
+
+        # #if defined('FAV_COLOR') and FAV_COLOR == "blue"
+        ...
+        # #elif FAV_COLOR == "red"
+        ...
+        # #else
+        ...
+        # #endif
+
+    C (*.c, *.h), C++ (*.cpp, *.cxx, *.cc, *.h, *.hpp, *.hxx, *.hh),
+    Java (*.java), PHP (*.php) or C# (*.cs) files:
+
+        // #define FAV_COLOR 'blue'
+        ...
+        /* #ifndef FAV_COLOR */
+        ...
+        // #endif
+
+    Fortran 77 (*.f) or 90/95 (*.f90) files:
+
+        C     #if COEFF == 'var'
+              ...
+        C     #endif
+
+    And other languages.
+
+
+    Preprocessor Syntax
+    -------------------
+
+    - Valid statements:
+        #define <var> [<value>]
+        #undef <var>
+        #ifdef <var>
+        #ifndef <var>
+        #if <expr>
+        #elif <expr>
+        #else
+        #endif
+        #error <error string>
+        #include "<file>"
+        #include <var>
+      where <expr> is any valid Python expression.
+    - The expression after #if/elif may be a Python statement. It is an
+      error to refer to a variable that has not been defined by a -D
+      option or by an in-content #define.
+    - Special built-in methods for expressions:
+        defined(varName)    Return true if given variable is defined.
+
+
+    Tips
+    ----
+
+    A suggested file naming convention is to let input files to
+    preprocess be of the form <basename>.p.<ext> and direct the output
+    of preprocess to <basename>.<ext>, e.g.:
+        preprocess -o foo.py foo.p.py
+    The advantage is that other tools (esp. editors) will still
+    recognize the unpreprocessed file as the original language.
+"""
+
+__version_info__ = (1, 1, 0)
+__version__ = '.'.join(map(str, __version_info__))
+
+import os
+import sys
+import getopt
+import types
+import re
+import pprint
+
+
+
+#---- exceptions
+
+class PreprocessError(Exception):
+    def __init__(self, errmsg, file=None, lineno=None, line=None):
+        self.errmsg = str(errmsg)
+        self.file = file
+        self.lineno = lineno
+        self.line = line
+        Exception.__init__(self, errmsg, file, lineno, line)
+    def __str__(self):
+        s = ""
+        if self.file is not None:
+            s += self.file + ":"
+        if self.lineno is not None:
+            s += str(self.lineno) + ":"
+        if self.file is not None or self.lineno is not None:
+            s += " "
+        s += self.errmsg
+        #if self.line is not None:
+        #    s += ": " + self.line
+        return s
+
+
+
+#---- global data
+
+# Comment delimiter info.
+#   A mapping of content type to a list of 2-tuples defining the line
+#   prefix and suffix for a comment. Each prefix or suffix can either
+#   be a string (in which case it is transformed into a pattern allowing
+#   whitespace on either side) or a compiled regex.
+_commentGroups = {
+    "Python":     [ ('#', '') ],
+    "Perl":       [ ('#', '') ],
+    "PHP":        [ ('/*', '*/'), ('//', ''), ('#', '') ],
+    "Ruby":       [ ('#', '') ],
+    "Tcl":        [ ('#', '') ],
+    "Shell":      [ ('#', '') ],
+    # Allowing for CSS and JavaScript comments in XML/HTML.
+    "XML":        [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
+    "HTML":       [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
+    "Makefile":   [ ('#', '') ],
+    "JavaScript": [ ('/*', '*/'), ('//', '') ],
+    "CSS":        [ ('/*', '*/') ],
+    "C":          [ ('/*', '*/') ],
+    "C++":        [ ('/*', '*/'), ('//', '') ],
+    "Java":       [ ('/*', '*/'), ('//', '') ],
+    "C#":         [ ('/*', '*/'), ('//', '') ],
+    "IDL":        [ ('/*', '*/'), ('//', '') ],
+    "Text":       [ ('#', '') ],
+    "Fortran":    [ (re.compile(r'^[a-zA-Z*$]\s*'), ''), ('!', '') ],
+    "TeX":        [ ('%', '') ],
+}
+
+
+
+#---- internal logging facility
+
+class _Logger:
+    DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
+    def __init__(self, name, level=None, streamOrFileName=sys.stderr):
+        self._name = name
+        if level is None:
+            self.level = self.WARN
+        else:
+            self.level = level
+        if type(streamOrFileName) == types.StringType:
+            self.stream = open(streamOrFileName, 'w')
+            self._opennedStream = 1
+        else:
+            self.stream = streamOrFileName
+            self._opennedStream = 0
+    def __del__(self):
+        if self._opennedStream:
+            self.stream.close()
+    def getLevel(self):
+        return self.level
+    def setLevel(self, level):
+        self.level = level
+    def _getLevelName(self, level):
+        levelNameMap = {
+            self.DEBUG: "DEBUG",
+            self.INFO: "INFO",
+            self.WARN: "WARN",
+            self.ERROR: "ERROR",
+            self.CRITICAL: "CRITICAL",
+        }
+        return levelNameMap[level]
+    def isEnabled(self, level):
+        return level >= self.level
+    def isDebugEnabled(self): return self.isEnabled(self.DEBUG)
+    def isInfoEnabled(self): return self.isEnabled(self.INFO)
+    def isWarnEnabled(self): return self.isEnabled(self.WARN)
+    def isErrorEnabled(self): return self.isEnabled(self.ERROR)
+    def isFatalEnabled(self): return self.isEnabled(self.FATAL)
+    def log(self, level, msg, *args):
+        if level < self.level:
+            return
+        message = "%s: %s: " % (self._name, self._getLevelName(level).lower())
+        message = message + (msg % args) + "\n"
+        self.stream.write(message)
+        self.stream.flush()
+    def debug(self, msg, *args):
+        self.log(self.DEBUG, msg, *args)
+    def info(self, msg, *args):
+        self.log(self.INFO, msg, *args)
+    def warn(self, msg, *args):
+        self.log(self.WARN, msg, *args)
+    def error(self, msg, *args):
+        self.log(self.ERROR, msg, *args)
+    def fatal(self, msg, *args):
+        self.log(self.CRITICAL, msg, *args)
+
+log = _Logger("gmxpreprocess", _Logger.WARN)
+
+
+
+#---- internal support stuff
+
+def SubstituteInternal(expr, defines):
+    prevexpr = ''
+    while (expr!=prevexpr):
+        prevexpr=expr
+        for name in reversed(sorted(defines, key=len)):
+            value = defines[name]
+            expr = expr.replace(name, str(value))
+    return expr
+
+def SubstituteInCode(expr, defines):
+    prevexpr = ''
+    while (expr!=prevexpr):
+        prevexpr=expr
+        for name in reversed(sorted(defines, key=len)):
+            value = defines[name]
+            expr = expr.replace('{' + name + '}', str(value))
+    return expr
+
+
+def _evaluate(expr, defines):
+    """Evaluate the given expression string with the given context.
+
+    WARNING: This runs eval() on a user string. This is unsafe.
+    """
+    #interpolated = _interpolate(s, defines)
+
+    try:
+        rv = eval(expr, {'defined':lambda v: v in defines}, defines)
+    except Exception, ex:
+        msg = str(ex)
+        if msg.startswith("name '") and msg.endswith("' is not defined"):
+            # A common error (at least this is presumed:) is to have
+            #   defined(FOO)   instead of   defined('FOO')
+            # We should give a little as to what might be wrong.
+            # msg == "name 'FOO' is not defined"  -->  varName == "FOO"
+            varName = msg[len("name '"):-len("' is not defined")]
+            if expr.find("defined(%s)" % varName) != -1:
+                # "defined(FOO)" in expr instead of "defined('FOO')"
+                msg += " (perhaps you want \"defined('%s')\" instead of "\
+                       "\"defined(%s)\")" % (varName, varName)
+        elif msg.startswith("invalid syntax"):
+            msg = "invalid syntax: '%s'" % expr
+        raise PreprocessError(msg, defines['__FILE__'], defines['__LINE__'])
+    log.debug("evaluate %r -> %s (defines=%r)", expr, rv, defines)
+
+    return rv
+
+#---- module API
+
+def gmxpreprocess(infile, outfile=sys.stdout, defines={},
+               force=0, keepLines=0, includePath=[], substitute=1,
+               contentType=None, contentTypesRegistry=None,
+               __preprocessedFiles=None):
+    """Preprocess the given file.
+
+    "infile" is the input path.
+    "outfile" is the output path or stream (default is sys.stdout).
+    "defines" is a dictionary of defined variables that will be
+        understood in preprocessor statements. Keys must be strings and,
+        currently, only the truth value of any key's value matters.
+    "force" will overwrite the given outfile if it already exists. Otherwise
+        an IOError will be raise if the outfile already exists.
+    "keepLines" will cause blank lines to be emitted for preprocessor lines
+        and content lines that would otherwise be skipped.
+    "includePath" is a list of directories to search for given #include
+        directives. The directory of the file being processed is presumed.
+    "substitute", if true, will allow substitution of defines into emitted
+        lines. (NOTE: This substitution will happen within program strings
+        as well. This may not be what you expect.)
+    "contentType" can be used to specify the content type of the input
+        file. It not given, it will be guessed.
+    "contentTypesRegistry" is an instance of ContentTypesRegistry. If not specified
+        a default registry will be created.
+    "__preprocessedFiles" (for internal use only) is used to ensure files
+        are not recusively preprocessed.
+
+    Returns the modified dictionary of defines or raises PreprocessError if
+    there was some problem.
+    """
+    if __preprocessedFiles is None:
+        __preprocessedFiles = []
+    log.info("preprocess(infile=%r, outfile=%r, defines=%r, force=%r, "\
+             "keepLines=%r, includePath=%r, contentType=%r, "\
+             "__preprocessedFiles=%r)", infile, outfile, defines, force,
+             keepLines, includePath, contentType, __preprocessedFiles)
+    absInfile = os.path.normpath(os.path.abspath(infile))
+    if absInfile in __preprocessedFiles:
+        raise PreprocessError("detected recursive #include of '%s'"\
+                              % infile)
+    __preprocessedFiles.append(os.path.abspath(infile))
+
+    # Determine the content type and comment info for the input file.
+    if contentType is None:
+        registry = contentTypesRegistry or getDefaultContentTypesRegistry()
+        contentType = registry.getContentType(infile)
+        if contentType is None:
+            contentType = "Text"
+            log.warn("defaulting content type for '%s' to '%s'",
+                     infile, contentType)
+    try:
+        cgs = _commentGroups[contentType]
+    except KeyError:
+        raise PreprocessError("don't know comment delimiters for content "\
+                              "type '%s' (file '%s')"\
+                              % (contentType, infile))
+
+    # Generate statement parsing regexes. Basic format:
+    #       <comment-prefix> <preprocessor-stmt> <comment-suffix>
+    #  Examples:
+    #       <!-- #if foo -->
+    #       ...
+    #       <!-- #endif -->
+    #
+    #       # #if BAR
+    #       ...
+    #       # #else
+    #       ...
+    #       # #endif
+    stmts = ['##\s*(?P<op>.*?)',
+             '#\s*(?P<op>if|elif|ifdef|ifndef)\s+(?P<expr>.*?)',
+             '#\s*(?P<op>else|endif)',
+             '#\s*(?P<op>error)\s+(?P<error>.*?)',
+             '#\s*(?P<op>define)\s+(?P<var>[^\s]*?)(\s+(?P<val>.+?))?',
+             '#\s*(?P<op>undef)\s+(?P<var>[^\s]*?)',
+             '#\s*(?P<op>for)\s+(?P<var>.*?)\s+((in)|(IN))\s+(?P<valuelist>.*?)',
+             '#\s*(?P<op>endfor)',
+             '#\s*(?P<op>include)\s+"(?P<fname>.*?)"',
+             r'#\s*(?P<op>include)\s+(?P<var>[^\s]+?)',
+            ]
+    patterns = []
+    for stmt in stmts:
+        # The comment group prefix and suffix can either be just a
+        # string or a compiled regex.
+        for cprefix, csuffix in cgs:
+            if hasattr(cprefix, "pattern"):
+                pattern = cprefix.pattern
+            else:
+                pattern = r"^\s*%s\s*" % re.escape(cprefix)
+            pattern += stmt
+            if hasattr(csuffix, "pattern"):
+                pattern += csuffix.pattern
+            else:
+                pattern += r"\s*%s\s*$" % re.escape(csuffix)
+            patterns.append(pattern)
+    stmtRes = [re.compile(p) for p in patterns]
+
+    # Process the input file.
+    # (Would be helpful if I knew anything about lexing and parsing
+    # simple grammars.)
+    fin = open(infile, 'r')
+    lines = fin.readlines()
+    # Merge multi-line comments
+    for i in range(len(lines)-1,-1,-1):
+        line = lines[i].rstrip(' \r\n')
+        if len(line)>0 and line[-1]=='\\':
+            lines[i] = line[:-1] + ' ' + lines[i+1]
+            lines[i+1] = ''    # keep an empty line to avoid screwing up line numbers
+
+    fin.close()
+    if type(outfile) in types.StringTypes:
+        if force and os.path.exists(outfile):
+            os.chmod(outfile, 0777)
+            os.remove(outfile)
+        fout = open(outfile, 'w')
+    else:
+        fout = outfile
+
+    defines['__FILE__'] = infile
+    SKIP, EMIT = range(2) # states
+    states = [(EMIT,   # a state is (<emit-or-skip-lines-in-this-section>,
+               0,      #             <have-emitted-in-this-if-block>,
+               0)]     #             <have-seen-'else'-in-this-if-block>)
+    lineNum = 0
+    nlines = len(lines)
+    forlevel = 0
+    forvar = {}
+    forvaluelist = {}
+    forstartline = {}
+    foriteration = {}
+    last_emitted_was_blank = True
+
+    while lineNum<nlines:
+
+        line = lines[lineNum]
+
+        log.debug("line %d: %r", lineNum+1, line)
+        defines['__LINE__'] = lineNum+1
+
+        # Is this line a preprocessor stmt line?
+        #XXX Could probably speed this up by optimizing common case of
+        #    line NOT being a preprocessor stmt line.
+        for stmtRe in stmtRes:
+            match = stmtRe.match(line)
+            if match:
+                break
+        else:
+            match = None
+
+        if match:
+
+            # Remove contents after ## (comment)
+            idx=line.find("##")
+            if(idx>0):
+                line = line[0:idx]
+
+            op = match.group("op")
+            log.debug("%r stmt (states: %r)", op, states)
+            if op == "define":
+                if not (states and states[-1][0] == SKIP):
+                    var, val = match.group("var", "val")
+                    val = SubstituteInternal(str(val), defines)
+
+                    if val is None:
+                        val = None
+                    else:
+                        try:
+                            val = eval(val, {}, {})
+                        except:
+                            pass
+                    defines[var] = val
+            elif op == "undef":
+                if not (states and states[-1][0] == SKIP):
+                    var = match.group("var")
+                    try:
+                        del defines[var]
+                    except KeyError:
+                        pass
+            elif op == "include":
+                if not (states and states[-1][0] == SKIP):
+                    if "var" in match.groupdict():
+                        # This is the second include form: #include VAR
+                        var = match.group("var")
+                        f = defines[var]
+                    else:
+                        # This is the first include form: #include "path"
+                        f = match.group("fname")
+
+                    for d in [os.path.dirname(infile)] + includePath:
+                        fname = os.path.normpath(os.path.join(d, f))
+                        if os.path.exists(fname):
+                            break
+                    else:
+                        raise PreprocessError("could not find #include'd file "\
+                                              "\"%s\" on include path: %r"\
+                                              % (f, includePath))
+                    defines = gmxpreprocess(fname, fout, defines, force,
+                                            keepLines, includePath, substitute,
+                                            contentTypesRegistry=contentTypesRegistry,
+                                            __preprocessedFiles=__preprocessedFiles)
+            elif op in ("if", "ifdef", "ifndef"):
+                if op == "if":
+                    expr = match.group("expr")
+                elif op == "ifdef":
+                    expr = "defined('%s')" % match.group("expr")
+                elif op == "ifndef":
+                    expr = "not defined('%s')" % match.group("expr")
+                try:
+                    if states and states[-1][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states.append((SKIP, 0, 0))
+                    elif _evaluate(expr, defines):
+                        states.append((EMIT, 1, 0))
+                    else:
+                        states.append((SKIP, 0, 0))
+                except KeyError:
+                    raise PreprocessError("use of undefined variable in "\
+                                          "#%s stmt" % op, defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "elif":
+                expr = match.group("expr")
+                try:
+                    if states[-1][2]: # already had #else in this if-block
+                        raise PreprocessError("illegal #elif after #else in "\
+                            "same #if block", defines['__FILE__'],
+                            defines['__LINE__'], line)
+                    elif states[-1][1]: # if have emitted in this if-block
+                        states[-1] = (SKIP, 1, 0)
+                    elif states[:-1] and states[-2][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states[-1] = (SKIP, 0, 0)
+                    elif _evaluate(expr, defines):
+                        states[-1] = (EMIT, 1, 0)
+                    else:
+                        states[-1] = (SKIP, 0, 0)
+                except IndexError:
+                    raise PreprocessError("#elif stmt without leading #if "\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "else":
+                try:
+                    if states[-1][2]: # already had #else in this if-block
+                        raise PreprocessError("illegal #else after #else in "\
+                            "same #if block", defines['__FILE__'],
+                            defines['__LINE__'], line)
+                    elif states[-1][1]: # if have emitted in this if-block
+                        states[-1] = (SKIP, 1, 1)
+                    elif states[:-1] and states[-2][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states[-1] = (SKIP, 0, 1)
+                    else:
+                        states[-1] = (EMIT, 1, 1)
+                except IndexError:
+                    raise PreprocessError("#else stmt without leading #if "\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "endif":
+                try:
+                    states.pop()
+                except IndexError:
+                    raise PreprocessError("#endif stmt without leading #if"\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "for":
+
+                tmpstr     = match.group("var")
+                thisforvar = tmpstr.split(",")
+                for s in thisforvar:
+                    s.strip(" ")
+
+                # Thisforvar is now a _list_ if 1 or more for variables, without whitespace
+
+                # Evaluate the list-of-values just in case it refers to a list variable
+                valuelist = _evaluate(match.group("valuelist"),defines)
+                # If a string, evaluate it again
+                if(isinstance(valuelist,str)):
+                    valuelist = eval(valuelist)
+
+                forlevel += 1
+
+                forvar[forlevel]       = thisforvar
+                forvaluelist[forlevel] = valuelist
+                forstartline[forlevel] = lineNum + 1
+                foriteration[forlevel] = 0
+
+                if(len(valuelist)>0):
+                    # set the variable for this for-loop to the first value in the list for this level
+                    nvar=len(thisforvar)
+                    for i in range(nvar):
+                        if(nvar==1):
+                            val=valuelist[0]
+                        else:
+                            val=valuelist[0][i]
+                        defines[thisforvar[i]] = val
+
+                else:
+                    # list was empty, so skip this entire section
+                    states.append((SKIP, 0, 0))
+
+            elif op == "endfor":
+                foriteration[forlevel] += 1
+                # Should we do one more iteration on this level?
+                iter       = foriteration[forlevel]
+                thisforvar = forvar[forlevel]
+                valuelist  = forvaluelist[forlevel]
+
+                if(iter<len(valuelist)):
+
+                    nvar = len(thisforvar)
+                    for i in range(len(thisforvar)):
+                        if(nvar==1):
+                           val=valuelist[iter]
+                        else:
+                            val=valuelist[iter][i]
+                        defines[thisforvar[i]] = val
+
+                    lineNum             = forstartline[forlevel]
+                    continue
+                else:
+                    forlevel -= 1
+                    if(len(valuelist)==0):
+                        states.pop()
+
+            elif op == "error":
+                if not (states and states[-1][0] == SKIP):
+                    error = match.group("error")
+                    raise PreprocessError("#error: "+error, defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            log.debug("states: %r", states)
+            if keepLines:
+                fout.write("\n")
+        else:
+            try:
+                if states[-1][0] == EMIT:
+                    log.debug("emit line (%s)" % states[-1][1])
+                    # Substitute all defines into line.
+                    # XXX Should avoid recursive substitutions. But that
+                    #     would be a pain right now.
+
+                    sline = line
+                    if substitute:
+                        sline = SubstituteInCode(sline,defines)
+
+                    emitted_line_is_blank = (sline.strip()=='')
+                    if( not (emitted_line_is_blank and last_emitted_was_blank) and not keepLines):
+                        fout.write(sline)
+                        last_emitted_was_blank = emitted_line_is_blank
+
+                elif keepLines:
+                    log.debug("keep blank line (%s)" % states[-1][1])
+                    fout.write("\n")
+                else:
+                    log.debug("skip line (%s)" % states[-1][1])
+            except IndexError:
+                raise PreprocessError("superfluous #endif before this line",
+                                      defines['__FILE__'],
+                                      defines['__LINE__'])
+        lineNum += 1
+
+    if len(states) > 1:
+        raise PreprocessError("unterminated #if block", defines['__FILE__'],
+                              defines['__LINE__'])
+    elif len(states) < 1:
+        raise PreprocessError("superfluous #endif on or before this line",
+                              defines['__FILE__'], defines['__LINE__'])
+
+    if fout != outfile:
+        fout.close()
+
+    return defines
+
+
+#---- content-type handling
+
+_gDefaultContentTypes = """
+    # Default file types understood by "gmxpreprocess.py".
+    #
+    # Format is an extension of 'mime.types' file syntax.
+    #   - '#' indicates a comment to the end of the line.
+    #   - a line is:
+    #       <filetype> [<pattern>...]
+    #     where,
+    #       <filetype>'s are equivalent in spirit to the names used in the Windows
+    #           registry in HKCR, but some of those names suck or are inconsistent;
+    #           and
+    #       <pattern> is a suffix (pattern starts with a '.'), a regular expression
+    #           (pattern is enclosed in '/' characters), a full filename (anything
+    #           else).
+    #
+    # Notes on case-sensitivity:
+    #
+    # A suffix pattern is case-insensitive on Windows and case-sensitive
+    # elsewhere.  A filename pattern is case-sensitive everywhere. A regex
+    # pattern's case-sensitivity is defined by the regex. This means it is by
+    # default case-sensitive, but this can be changed using Python's inline
+    # regex option syntax. E.g.:
+    #         Makefile            /^(?i)makefile.*$/   # case-INsensitive regex
+
+    Python              .py
+    Python              .pyw
+    Perl                .pl
+    Ruby                .rb
+    Tcl                 .tcl
+    XML                 .xml
+    XML                 .kpf
+    XML                 .xul
+    XML                 .rdf
+    XML                 .xslt
+    XML                 .xsl
+    XML                 .wxs
+    XML                 .wxi
+    HTML                .htm
+    HTML                .html
+    XML                 .xhtml
+    Makefile            /^[Mm]akefile.*$/
+    PHP                 .php
+    JavaScript          .js
+    CSS                 .css
+    C++                 .c       # C++ because then we can use //-style comments
+    C++                 .cpp
+    C++                 .cxx
+    C++                 .cc
+    C++                 .h
+    C++                 .hpp
+    C++                 .hxx
+    C++                 .hh
+    C++                 .gpp     # Gromacs pre-preprocessing
+    IDL                 .idl
+    Text                .txt
+    Fortran             .f
+    Fortran             .f90
+    Shell               .sh
+    Shell               .csh
+    Shell               .ksh
+    Shell               .zsh
+    Java                .java
+    C#                  .cs
+    TeX                 .tex
+
+    # Some Komodo-specific file extensions
+    Python              .ksf  # Fonts & Colors scheme files
+    Text                .kkf  # Keybinding schemes files
+"""
+
+class ContentTypesRegistry:
+    """A class that handles determining the filetype of a given path.
+
+    Usage:
+        >>> registry = ContentTypesRegistry()
+        >>> registry.getContentType("foo.py")
+        "Python"
+    """
+
+    def __init__(self, contentTypesPaths=None):
+        self.contentTypesPaths = contentTypesPaths
+        self._load()
+
+    def _load(self):
+        from os.path import dirname, join, exists
+
+        self.suffixMap = {}
+        self.regexMap = {}
+        self.filenameMap = {}
+
+        self._loadContentType(_gDefaultContentTypes)
+        localContentTypesPath = join(dirname(__file__), "content.types")
+        if exists(localContentTypesPath):
+            log.debug("load content types file: `%r'" % localContentTypesPath)
+            self._loadContentType(open(localContentTypesPath, 'r').read())
+        for path in (self.contentTypesPaths or []):
+            log.debug("load content types file: `%r'" % path)
+            self._loadContentType(open(path, 'r').read())
+
+    def _loadContentType(self, content, path=None):
+        """Return the registry for the given content.types file.
+
+        The registry is three mappings:
+            <suffix> -> <content type>
+            <regex> -> <content type>
+            <filename> -> <content type>
+        """
+        for line in content.splitlines(0):
+            words = line.strip().split()
+            for i in range(len(words)):
+                if words[i][0] == '#':
+                    del words[i:]
+                    break
+            if not words: continue
+            contentType, patterns = words[0], words[1:]
+            if not patterns:
+                if line[-1] == '\n': line = line[:-1]
+                raise PreprocessError("bogus content.types line, there must "\
+                                      "be one or more patterns: '%s'" % line)
+            for pattern in patterns:
+                if pattern.startswith('.'):
+                    if sys.platform.startswith("win"):
+                        # Suffix patterns are case-insensitive on Windows.
+                        pattern = pattern.lower()
+                    self.suffixMap[pattern] = contentType
+                elif pattern.startswith('/') and pattern.endswith('/'):
+                    self.regexMap[re.compile(pattern[1:-1])] = contentType
+                else:
+                    self.filenameMap[pattern] = contentType
+
+    def getContentType(self, path):
+        """Return a content type for the given path.
+
+        @param path {str} The path of file for which to guess the
+            content type.
+        @returns {str|None} Returns None if could not determine the
+            content type.
+        """
+        basename = os.path.basename(path)
+        contentType = None
+        # Try to determine from the path.
+        if not contentType and self.filenameMap.has_key(basename):
+            contentType = self.filenameMap[basename]
+            log.debug("Content type of '%s' is '%s' (determined from full "\
+                      "path).", path, contentType)
+        # Try to determine from the suffix.
+        if not contentType and '.' in basename:
+            suffix = "." + basename.split(".")[-1]
+            if sys.platform.startswith("win"):
+                # Suffix patterns are case-insensitive on Windows.
+                suffix = suffix.lower()
+            if self.suffixMap.has_key(suffix):
+                contentType = self.suffixMap[suffix]
+                log.debug("Content type of '%s' is '%s' (determined from "\
+                          "suffix '%s').", path, contentType, suffix)
+        # Try to determine from the registered set of regex patterns.
+        if not contentType:
+            for regex, ctype in self.regexMap.items():
+                if regex.search(basename):
+                    contentType = ctype
+                    log.debug("Content type of '%s' is '%s' (matches regex '%s')",
+                              path, contentType, regex.pattern)
+                    break
+        # Try to determine from the file contents.
+        content = open(path, 'rb').read()
+        if content.startswith("<?xml"):  # cheap XML sniffing
+            contentType = "XML"
+        return contentType
+
+_gDefaultContentTypesRegistry = None
+def getDefaultContentTypesRegistry():
+    global _gDefaultContentTypesRegistry
+    if _gDefaultContentTypesRegistry is None:
+        _gDefaultContentTypesRegistry = ContentTypesRegistry()
+    return _gDefaultContentTypesRegistry
+
+
+#---- internal support stuff
+#TODO: move other internal stuff down to this section
+
+try:
+    reversed
+except NameError:
+    # 'reversed' added in Python 2.4 (http://www.python.org/doc/2.4/whatsnew/node7.html)
+    def reversed(seq):
+        rseq = list(seq)
+        rseq.reverse()
+        for item in rseq:
+            yield item
+try:
+    sorted
+except NameError:
+    # 'sorted' added in Python 2.4. Note that I'm only implementing enough
+    # of sorted as is used in this module.
+    def sorted(seq, key=None):
+        identity = lambda x: x
+        key_func = (key or identity)
+        sseq = list(seq)
+        sseq.sort(lambda self, other: cmp(key_func(self), key_func(other)))
+        for item in sseq:
+            yield item
+
+
+#---- mainline
+
+def main(argv):
+    try:
+        optlist, args = getopt.getopt(argv[1:], 'hVvo:D:fkI:sc:',
+            ['help', 'version', 'verbose', 'force', 'keep-lines',
+             'no-substitute', 'content-types-path='])
+    except getopt.GetoptError, msg:
+        sys.stderr.write("gmxpreprocess: error: %s. Your invocation was: %s\n"\
+                         % (msg, argv))
+        sys.stderr.write("See 'gmxpreprocess --help'.\n")
+        return 1
+    outfile = sys.stdout
+    defines = {}
+    force = 0
+    keepLines = 0
+    substitute = 1
+    includePath = []
+    contentTypesPaths = []
+    for opt, optarg in optlist:
+        if opt in ('-h', '--help'):
+            sys.stdout.write(__doc__)
+            return 0
+        elif opt in ('-V', '--version'):
+            sys.stdout.write("gmxpreprocess %s\n" % __version__)
+            return 0
+        elif opt in ('-v', '--verbose'):
+            log.setLevel(log.DEBUG)
+        elif opt == '-o':
+            outfile = optarg
+        if opt in ('-f', '--force'):
+            force = 1
+        elif opt == '-D':
+            if optarg.find('=') != -1:
+                var, val = optarg.split('=', 1)
+                try:
+                    val = int(val)
+                except ValueError:
+                    pass
+            else:
+                var, val = optarg, None
+            defines[var] = val
+        elif opt in ('-k', '--keep-lines'):
+            keepLines = 1
+        elif opt == '-I':
+            includePath.append(optarg)
+        elif opt in ('-s', '--no-substitute'):
+            substitute = 0
+        elif opt in ('-c', '--content-types-path'):
+            contentTypesPaths.append(optarg)
+
+    if len(args) != 1:
+        sys.stderr.write("gmxpreprocess: error: incorrect number of "\
+                         "arguments: argv=%r\n" % argv)
+        return 1
+    else:
+        infile = args[0]
+
+    try:
+        contentTypesRegistry = ContentTypesRegistry(contentTypesPaths)
+        gmxpreprocess(infile, outfile, defines, force, keepLines, includePath,
+                   substitute, contentTypesRegistry=contentTypesRegistry)
+    except PreprocessError, ex:
+        if log.isDebugEnabled():
+            import traceback
+            traceback.print_exc(file=sys.stderr)
+        else:
+            sys.stderr.write("gmxpreprocess: error: %s\n" % str(ex))
+        return 1
+
+if __name__ == "__main__":
+    __file__ = sys.argv[0]
+    sys.exit( main(sys.argv) )
+
index 5bc7155ae7ad07418c390c5d5dbbf8758b796c1a..3a3e5d5e226144ca3894148d3b313b4e6bdc6ba5 100644 (file)
 #include "smalloc.h"
 #include "copyrite.h"
 
+
+
+
+
 typedef struct {
   const char *name;
   int  flop;
@@ -54,142 +58,37 @@ typedef struct {
 
 
 static const t_nrnb_data nbdata[eNRNB] = {
-    { "LJ",                             33 }, /* nb_kernel010 */
-    { "Buckingham",                     61 }, /* nb_kernel020 */ 
-    { "VdW(T)",                         54 }, /* nb_kernel030 */
-    { "Coulomb",                        27 }, /* nb_kernel100 */
-    { "Coulomb [W3]",                   80 }, /* nb_kernel101 */
-    { "Coulomb [W3-W3]",               234 }, /* nb_kernel102 */
-    { "Coulomb [W4]",                   80 }, /* nb_kernel103 */
-    { "Coulomb [W4-W4]",               234 }, /* nb_kernel104 */
-    { "Coulomb + LJ",                   38 }, /* nb_kernel110 */
-    { "Coulomb + LJ [W3]",              91 }, /* nb_kernel111 */
-    { "Coulomb + LJ [W3-W3]",          245 }, /* nb_kernel112 */
-    { "Coulomb + LJ [W4]",             113 }, /* nb_kernel113 */
-    { "Coulomb + LJ [W4-W4]",          267 }, /* nb_kernel114 */
-    { "Coulomb + Bham ",                64 }, /* nb_kernel120 */
-    { "Coulomb + Bham [W3]",           117 }, /* nb_kernel121 */
-    { "Coulomb + Bham [W3-W3]",        271 }, /* nb_kernel122 */
-    { "Coulomb + Bham [W4]",           141 }, /* nb_kernel123 */
-    { "Coulomb + Bham [W4-W4]",        295 }, /* nb_kernel124 */
-    { "Coulomb + VdW(T) ",              59 }, /* nb_kernel130 */
-    { "Coulomb + VdW(T) [W3]",         112 }, /* nb_kernel131 */
-    { "Coulomb + VdW(T) [W3-W3]",      266 }, /* nb_kernel132 */
-    { "Coulomb + VdW(T) [W4]",         134 }, /* nb_kernel133 */
-    { "Coulomb + VdW(T) [W4-W4]",      288 }, /* nb_kernel134 */
-    { "RF Coul",                        33 }, /* nb_kernel200 */
-    { "RF Coul [W3]",                   98 }, /* nb_kernel201 */
-    { "RF Coul [W3-W3]",               288 }, /* nb_kernel202 */
-    { "RF Coul [W4]",                   98 }, /* nb_kernel203 */
-    { "RF Coul [W4-W4]",               288 }, /* nb_kernel204 */
-    { "RF Coul + LJ",                   44 }, /* nb_kernel210 */
-    { "RF Coul + LJ [W3]",             109 }, /* nb_kernel211 */
-    { "RF Coul + LJ [W3-W3]",          299 }, /* nb_kernel212 */
-    { "RF Coul + LJ [W4]",             131 }, /* nb_kernel213 */
-    { "RF Coul + LJ [W4-W4]",          321 }, /* nb_kernel214 */
-    { "RF Coul + Bham ",                70 }, /* nb_kernel220 */
-    { "RF Coul + Bham [W3]",           135 }, /* nb_kernel221 */
-    { "RF Coul + Bham [W3-W3]",        325 }, /* nb_kernel222 */
-    { "RF Coul + Bham [W4]",           159 }, /* nb_kernel223 */
-    { "RF Coul + Bham [W4-W4]",        349 }, /* nb_kernel224 */
-    { "RF Coul + VdW(T) ",              65 }, /* nb_kernel230 */
-    { "RF Coul + VdW(T) [W3]",         130 }, /* nb_kernel231 */
-    { "RF Coul + VdW(T) [W3-W3]",      320 }, /* nb_kernel232 */
-    { "RF Coul + VdW(T) [W4]",         152 }, /* nb_kernel233 */
-    { "RF Coul + VdW(T) [W4-W4]",      342 }, /* nb_kernel234 */
-    { "Coul(T)",                        42 }, /* nb_kernel300 */
-    { "Coul(T) [W3]",                  125 }, /* nb_kernel301 */
-    { "Coul(T) [W3-W3]",               369 }, /* nb_kernel302 */
-    { "Coul(T) [W4]",                  125 }, /* nb_kernel303 */
-    { "Coul(T) [W4-W4]",               369 }, /* nb_kernel304 */
-    { "Coul(T) + LJ",                   55 }, /* nb_kernel310 */
-    { "Coul(T) + LJ [W3]",             138 }, /* nb_kernel311 */
-    { "Coul(T) + LJ [W3-W3]",          382 }, /* nb_kernel312 */
-    { "Coul(T) + LJ [W4]",             158 }, /* nb_kernel313 */
-    { "Coul(T) + LJ [W4-W4]",          402 }, /* nb_kernel314 */
-    { "Coul(T) + Bham",                 81 }, /* nb_kernel320 */
-    { "Coul(T) + Bham [W3]",           164 }, /* nb_kernel321 */
-    { "Coul(T) + Bham [W3-W3]",        408 }, /* nb_kernel322 */
-    { "Coul(T) + Bham [W4]",           186 }, /* nb_kernel323 */
-    { "Coul(T) + Bham [W4-W4]",        430 }, /* nb_kernel324 */
-    { "Coul(T) + VdW(T)",               68 }, /* nb_kernel330 */
-    { "Coul(T) + VdW(T) [W3]",         151 }, /* nb_kernel331 */
-    { "Coul(T) + VdW(T) [W3-W3]",      395 }, /* nb_kernel332 */
-    { "Coul(T) + VdW(T) [W4]",         179 }, /* nb_kernel333 */
-    { "Coul(T) + VdW(T) [W4-W4]",      423 }, /* nb_kernel334 */
-    { "Generalized Born Coulomb",       48 }, /* nb_kernel400 */
-    { "GB Coulomb + LJ",                61 }, /* nb_kernel410 */
-    { "GB Coulomb + VdW(T)",            79 }, /* nb_kernel430 */
-    { "LJ NF",                          19 }, /* nb_kernel010nf */
-    { "Buckingham NF",                  48 }, /* nb_kernel020nf */ 
-    { "VdW(T) NF",                      33 }, /* nb_kernel030nf */
-    { "Coulomb NF",                     16 }, /* nb_kernel100nf */
-    { "Coulomb [W3] NF",                47 }, /* nb_kernel101nf */
-    { "Coulomb [W3-W3] NF",            135 }, /* nb_kernel102nf */
-    { "Coulomb [W4] NF",                47 }, /* nb_kernel103nf */
-    { "Coulomb [W4-W4] NF",            135 }, /* nb_kernel104nf */
-    { "Coulomb + LJ NF",                24 }, /* nb_kernel110nf */
-    { "Coulomb + LJ [W3] NF",           55 }, /* nb_kernel111nf */
-    { "Coulomb + LJ [W3-W3] NF",       143 }, /* nb_kernel112nf */
-    { "Coulomb + LJ [W4] NF",           66 }, /* nb_kernel113nf */
-    { "Coulomb + LJ [W4-W4] NF",       154 }, /* nb_kernel114nf */
-    { "Coulomb + Bham  NF",             51 }, /* nb_kernel120nf */
-    { "Coulomb + Bham [W3] NF",         82 }, /* nb_kernel121nf */
-    { "Coulomb + Bham [W3-W3] NF",     170 }, /* nb_kernel122nf */
-    { "Coulomb + Bham [W4] NF",         95 }, /* nb_kernel123nf */
-    { "Coulomb + Bham [W4-W4] NF",     183 }, /* nb_kernel124nf */
-    { "Coulomb + VdW(T)  NF",           36 }, /* nb_kernel130nf */
-    { "Coulomb + VdW(T) [W3] NF",       67 }, /* nb_kernel131nf */
-    { "Coulomb + VdW(T) [W3-W3] NF",   155 }, /* nb_kernel132nf */
-    { "Coulomb + VdW(T) [W4] NF",       80 }, /* nb_kernel133nf */
-    { "Coulomb + VdW(T) [W4-W4] NF",   168 }, /* nb_kernel134nf */
-    { "RF Coul NF",                     19 }, /* nb_kernel200nf */
-    { "RF Coul [W3] NF",                56 }, /* nb_kernel201nf */
-    { "RF Coul [W3-W3] NF",            162 }, /* nb_kernel202nf */
-    { "RF Coul [W4] NF",                56 }, /* nb_kernel203nf */
-    { "RF Coul [W4-W4] NF",            162 }, /* nb_kernel204nf */
-    { "RF Coul + LJ NF",                27 }, /* nb_kernel210nf */
-    { "RF Coul + LJ [W3] NF",           64 }, /* nb_kernel211nf */
-    { "RF Coul + LJ [W3-W3] NF",       170 }, /* nb_kernel212nf */
-    { "RF Coul + LJ [W4] NF",           75 }, /* nb_kernel213nf */
-    { "RF Coul + LJ [W4-W4] NF",       181 }, /* nb_kernel214nf */
-    { "RF Coul + Bham  NF",             54 }, /* nb_kernel220nf */
-    { "RF Coul + Bham [W3] NF",         91 }, /* nb_kernel221nf */
-    { "RF Coul + Bham [W3-W3] NF",     197 }, /* nb_kernel222nf */
-    { "RF Coul + Bham [W4] NF",        104 }, /* nb_kernel223nf */
-    { "RF Coul + Bham [W4-W4] NF",     210 }, /* nb_kernel224nf */
-    { "RF Coul + VdW(T)  NF",           39 }, /* nb_kernel230nf */
-    { "RF Coul + VdW(T) [W3] NF",       76 }, /* nb_kernel231nf */
-    { "RF Coul + VdW(T) [W3-W3] NF",   182 }, /* nb_kernel232nf */
-    { "RF Coul + VdW(T) [W4] NF",       89 }, /* nb_kernel233nf */
-    { "RF Coul + VdW(T) [W4-W4] NF",   195 }, /* nb_kernel234nf */
-    { "Coul(T) NF",                     26 }, /* nb_kernel300nf */
-    { "Coul(T) [W3] NF",                77 }, /* nb_kernel301nf */
-    { "Coul(T) [W3-W3] NF",            225 }, /* nb_kernel302nf */
-    { "Coul(T) [W4] NF",                77 }, /* nb_kernel303nf */
-    { "Coul(T) [W4-W4] NF",            225 }, /* nb_kernel304nf */
-    { "Coul(T) + LJ NF",                34 }, /* nb_kernel310nf */
-    { "Coul(T) + LJ [W3] NF",           85 }, /* nb_kernel311nf */
-    { "Coul(T) + LJ [W3-W3] NF",       233 }, /* nb_kernel312nf */
-    { "Coul(T) + LJ [W4] NF",           96 }, /* nb_kernel313nf */
-    { "Coul(T) + LJ [W4-W4] NF",       244 }, /* nb_kernel314nf */
-    { "Coul(T) + Bham NF",              61 }, /* nb_kernel320nf */
-    { "Coul(T) + Bham [W3] NF",        112 }, /* nb_kernel321nf */
-    { "Coul(T) + Bham [W3-W3] NF",     260 }, /* nb_kernel322nf */
-    { "Coul(T) + Bham [W4] NF",        125 }, /* nb_kernel323nf */
-    { "Coul(T) + Bham [W4-W4] NF",     273 }, /* nb_kernel324nf */
-    { "Coul(T) + VdW(T) NF",            42 }, /* nb_kernel330nf */
-    { "Coul(T) + VdW(T) [W3] NF",       93 }, /* nb_kernel331nf */
-    { "Coul(T) + VdW(T) [W3-W3] NF",   241 }, /* nb_kernel332nf */
-    { "Coul(T) + VdW(T) [W4] NF",      110 }, /* nb_kernel333nf */
-    { "Coul(T) + VdW(T) [W4-W4] NF",   258 }, /* nb_kernel334nf */
-    { "Generalized Born Coulomb NF",    29 }, /* nb_kernel400nf */
-    { "GB Coulomb + LJ NF",             37 }, /* nb_kernel410nf */
-    { "GB Coulomb + VdW(T) NF",         49 }, /* nb_kernel430nf */
-    { "Free energy innerloop",         150 }, /* free energy, estimate */  
-    { "All-vs-All, Coul + LJ",          38 },
-    { "All-vs-All, GB + LJ",            61 },
-    { "Outer nonbonded loop",           10 },
+    /* These are re-used for different NB kernels, since there are so many.
+     * The actual number of flops is set dynamically.
+     */
+    { "NB VdW [V&F]",                    1 },
+    { "NB VdW [F]",                      1 },
+    { "NB Elec. [V&F]",                  1 },
+    { "NB Elec. [F]",                    1 },
+    { "NB Elec. [W3,V&F]",               1 },
+    { "NB Elec. [W3,F]",                 1 },
+    { "NB Elec. [W3-W3,V&F]",            1 },
+    { "NB Elec. [W3-W3,F]",              1 },
+    { "NB Elec. [W4,V&F]",               1 },
+    { "NB Elec. [W4,F]",                 1 },
+    { "NB Elec. [W4-W4,V&F]",            1 },
+    { "NB Elec. [W4-W4,F]",              1 },
+    { "NB VdW & Elec. [V&F]",            1 },
+    { "NB VdW & Elec. [F]",              1 },
+    { "NB VdW & Elec. [W3,V&F]",         1 },
+    { "NB VdW & Elec. [W3,F]",           1 },
+    { "NB VdW & Elec. [W3-W3,V&F]",      1 },
+    { "NB VdW & Elec. [W3-W3,F]",        1 },
+    { "NB VdW & Elec. [W4,V&F]",         1 },
+    { "NB VdW & Elec. [W4,F]",           1 },
+    { "NB VdW & Elec. [W4-W4,V&F]",      1 },
+    { "NB VdW & Elec. [W4-W4,F]",        1 },
+    
+    { "NB Generic kernel",               1 },
+    { "NB Free energy kernel",           1 },
+    { "NB All-vs-all",                   1 },
+    { "NB All-vs-all, GB",               1 },
+
     { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
     /* nbnxn kernel flops are based on inner-loops without exclusion checks.
      * Plain Coulomb runs through the RF kernels, except with CUDA.
@@ -201,23 +100,23 @@ static const t_nrnb_data nbdata[eNRNB] = {
      * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
      *   is always counted as 6 flops, this roughly compensates.
      */
-    { "LJ + Coulomb RF (F)",            38 }, /* nbnxn kernel LJ+RF, no ener */
-    { "LJ + Coulomb RF (F+E)",          54 },
-    { "LJ + Coulomb tabulated (F)",     41 }, /* nbnxn kernel LJ+tab, no en */
-    { "LJ + Coulomb tabulated (F+E)",   59 },
-    { "LJ (F)",                         33 }, /* nbnxn kernel LJ, no ener */
-    { "LJ (F+E)",                       43 },
-    { "Coulomb RF (F)",                 31 }, /* nbnxn kernel RF, no ener */
-    { "Coulomb RF (F+E)",               36 },
-    { "Coulomb tabulated (F)",          34 }, /* nbnxn kernel tab, no ener */
-    { "Coulomb tabulated (F+E)",        41 },
+    { "NxN RF Elec. + VdW [F]",         38 }, /* nbnxn kernel LJ+RF, no ener */
+    { "NxN RF Elec. + VdW [V&F]",       54 },
+    { "NxN CSTab Elec. + VdW [F]",      41 }, /* nbnxn kernel LJ+tab, no en */
+    { "NxN CSTab Elec. + VdW [V&F]",    59 },
+    { "NxN VdW [F]",                    33 }, /* nbnxn kernel LJ, no ener */
+    { "NxN VdW [V&F]",                  43 },
+    { "NxN RF Electrostatics [F]",      31 }, /* nbnxn kernel RF, no ener */
+    { "NxN RF Electrostatics [V&F]",    36 },
+    { "NxN CSTab Elec. [F]",            34 }, /* nbnxn kernel tab, no ener */
+    { "NxN CSTab Elec. [V&F]",          41 },
     { "1,4 nonbonded interactions",     90 },
     { "Born radii (Still)",             47 },
     { "Born radii (HCT/OBC)",          183 },
     { "Born force chain rule",          15 },
-    { "All-vs-All Still radii",         47 },
-    { "All-vs-All HCT/OBC radii",      183 },
-    { "All-vs-All Born chain rule",     15 },
+    { "All-vs-All Still radii",          1 },
+    { "All-vs-All HCT/OBC radii",        1 },
+    { "All-vs-All Born chain rule",      1 },
     { "Calc Weights",                   36 },
     { "Spread Q",                        6 },
     { "Spread Q Bspline",                2 }, 
@@ -333,7 +232,7 @@ void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
   const char   *myline = "-----------------------------------------------------------------------------";
   
   *nbfs = 0.0;
-  for(i=0; (i<eNR_NBKERNEL_NR); i++) {
+  for(i=0; (i<eNR_NBKERNEL_ALLVSALLGB); i++) {
     if (strstr(nbdata[i].name,"W3-W3") != NULL)
       *nbfs += 9e-6*nrnb->n[i];
     else if (strstr(nbdata[i].name,"W3") != NULL)
@@ -357,14 +256,16 @@ void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
     fprintf(out,"\n\tM E G A - F L O P S   A C C O U N T I N G\n\n");
   }
 
-  if (out) {
-    fprintf(out,"   RF=Reaction-Field  FE=Free Energy  SCFE=Soft-Core/Free Energy\n");
-    fprintf(out,"   T=Tabulated        W3=SPC/TIP3p    W4=TIP4p (single or pairs)\n");
-    fprintf(out,"   NF=No Forces\n\n");
-    
-    fprintf(out," %-32s %16s %15s  %7s\n",
-           "Computing:","M-Number","M-Flops","% Flops");
-    fprintf(out,"%s\n",myline);
+  if (out)
+  {
+      fprintf(out," NB=Group-cutoff nonbonded kernels    NxN=N-by-N tile Verlet kernels\n");
+      fprintf(out," RF=Reaction-Field  VdW=Van der Waals  CSTab=Cubic-spline table\n");
+      fprintf(out," W3=SPC/TIP3p  W4=TIP4p (single or pairs)\n");
+      fprintf(out," V&F=Potential and force  V=Potential only  F=Force only\n\n");
+
+      fprintf(out," %-32s %16s %15s  %7s\n",
+              "Computing:","M-Number","M-Flops","% Flops");
+      fprintf(out,"%s\n",myline);
   }
   *mflop=0.0;
   tfrac=0.0;
@@ -462,7 +363,7 @@ const char *nrnb_str(int enr)
 static const int    force_index[]={ 
   eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER, 
   eNR_RB,     eNR_DISRES,  eNR_ORIRES, eNR_POSRES,
-  eNR_NS,     eNR_NBKERNEL_OUTER
+  eNR_NS,
 };
 #define NFORCE_INDEX asize(force_index)
 
@@ -513,7 +414,7 @@ void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
   for(i=0; (i<cr->nnodes); i++) {
       add_nrnb(av,av,&(nrnb[i]));
       /* Cost due to forces */
-      for(j=0; (j<eNR_NBKERNEL_NR); j++)
+      for(j=0; (j<eNR_NBKERNEL_ALLVSALLGB); j++)
        ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
       for(j=0; (j<NFORCE_INDEX); j++) 
        ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
index 9f84eb75d84e85ed50c4569c57557a209ae113af..5af6f04a9e74ff33a31e85f709a456e8a4f50d47 100644 (file)
@@ -231,42 +231,53 @@ void nice_header (FILE *out,const char *fn)
   fprintf (out,"%c\n",COMMENTSIGN);
 }
 
+
 int gmx_strcasecmp_min(const char *str1, const char *str2)
 {
-  char ch1,ch2;
-  
-  do
+    char ch1,ch2;
+
+    do
     {
-      do
-       ch1=toupper(*(str1++));
-      while ((ch1=='-') || (ch1=='_'));
-      do 
-       ch2=toupper(*(str2++));
-      while ((ch2=='-') || (ch2=='_'));
-      if (ch1!=ch2) return (ch1-ch2);
+        do
+        {
+            ch1=toupper(*(str1++));
+        }
+        while ((ch1=='-') || (ch1=='_'));
+        do
+        {
+            ch2=toupper(*(str2++));
+        }
+        while ((ch2=='-') || (ch2=='_'));
+
+        if (ch1!=ch2) return (ch1-ch2);
     }
-  while (ch1);
-  return 0; 
+    while (ch1);
+    return 0;
 }
 
 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 {
-  char ch1,ch2;
-  char *stri1, *stri2;
+    char ch1,ch2;
+    char *stri1, *stri2;
 
-  stri1=(char *)str1;
-  stri2=(char *)str2;  
-  do
+    stri1=(char *)str1;
+    stri2=(char *)str2;
+    do
     {
-      do
-       ch1=toupper(*(str1++));
-      while ((ch1=='-') || (ch1=='_'));
-      do 
-       ch2=toupper(*(str2++));
-      while ((ch2=='-') || (ch2=='_'));
-      if (ch1!=ch2) return (ch1-ch2);
+        do
+        {
+            ch1=toupper(*(str1++));
+        }
+        while ((ch1=='-') || (ch1=='_'));
+        do
+        {
+            ch2=toupper(*(str2++));
+        }
+        while ((ch2=='-') || (ch2=='_'));
+
+        if (ch1!=ch2) return (ch1-ch2);
     }
-  while (ch1 && (str1-stri1<n) && (str2-stri2<n));
+    while (ch1 && (str1-stri1<n) && (str2-stri2<n));
   return 0; 
 }
 
@@ -329,6 +340,25 @@ gmx_strndup(const char *src, int n)
     return dest;
 }
 
+/* Magic hash init number for Dan J. Bernsteins algorithm.
+ * Do NOT use any other value unless you really know what you are doing.
+ */
+const unsigned int
+gmx_string_hash_init = 5381;
+
+
+unsigned int
+gmx_string_hash_func(const char *s, unsigned int hash_init)
+{
+    int c;
+
+    while ((c = toupper(*s++)) != '\0')
+    {
+        if(isalnum(c)) hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
+    }
+    return hash_init;
+}
+
 /*!
  * \param[in] pattern  Pattern to match against.
  * \param[in] str      String to match.
index da659df491549c0dea599db5a482c2ba2d4668fe..62857d510ec66dc597e73486df7934374a030942 100644 (file)
@@ -73,7 +73,7 @@
 static const char *tpx_tag = TPX_TAG_RELEASE;
 
 /* This number should be increased whenever the file format changes! */
-static const int tpx_version = 81;
+static const int tpx_version = 82;
 
 /* This number should only be increased when you edit the TOPOLOGY section
  * or the HEADER of the tpx format.
@@ -687,7 +687,16 @@ static void do_inputrec(t_fileio *fio, t_inputrec *ir,gmx_bool bRead,
     if (file_version >= 67) {
       gmx_fio_do_real(fio,ir->rlistlong);
     }
-    gmx_fio_do_int(fio,ir->coulombtype); 
+    if(file_version >= 82)
+    {
+        gmx_fio_do_int(fio,ir->nstcalclr);
+    }
+    else
+    {
+        /* Calculate at NS steps */
+        ir->nstcalclr = ir->nstlist;
+    }
+    gmx_fio_do_int(fio,ir->coulombtype);
     if (file_version < 32 && ir->coulombtype == eelRF)
       ir->coulombtype = eelRF_NEC;      
     if (file_version >= 81)
index c5cf5bc32a393295713cc084c80399e73541052c..945a31a4d86c734251bb4f8669ae62438371c767 100644 (file)
@@ -698,6 +698,7 @@ void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
     PR("verlet-buffer-drift",ir->verletbuf_drift);
     PR("rlist",ir->rlist);
     PR("rlistlong",ir->rlistlong);
+    PR("nstcalclr",ir->nstcalclr);
     PR("rtpi",ir->rtpi);
     PS("coulombtype",EELTYPE(ir->coulombtype));
     PS("coulomb-modifier",INTMODIFIER(ir->coulomb_modifier));
index cf9c4a805ae3c7b3201fe98938a1f0aa393b67ed..5a8de3857ece158a1bc8ae2f319ed9b0fd825643 100644 (file)
@@ -309,8 +309,9 @@ static void upd_nbfplj(FILE *log,real *nbfp,int atnr,real f6[],real f12[],
        else
          sig  = sqrt(sigma[n]*sigma[m]);
        sig6 = pow(sig,6.0);
-       C6 (nbfp,atnr,n,m) = 4*eps*sig6; 
-       C12(nbfp,atnr,n,m) = 4*eps*sig6*sig6;
+    /* nbfp now includes the 6.0/12.0 derivative prefactors */
+       C6 (nbfp,atnr,n,m) = 4*eps*sig6/6.0;
+       C12(nbfp,atnr,n,m) = 4*eps*sig6*sig6/12.0;
       }
     }
     sfree(sigma);
@@ -677,9 +678,12 @@ void do_coupling(FILE *log,const output_env_t oenv,int nfile,
       ati = tclj->at_i;
       atj = tclj->at_j;
       if (atj == -1) 
-       atj = ati;
-      tclj->c6  =  C6(fr->nbfp,fr->ntype,ati,atj);
-      tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj);
+      {
+          atj = ati;
+      }
+      /* nbfp now includes the 6.0/12.0 derivative prefactors */
+      tclj->c6  =  C6(fr->nbfp,fr->ntype,ati,atj)/6.0;
+      tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj)/12.0;
     }
   }
   else {
@@ -707,10 +711,13 @@ void do_coupling(FILE *log,const output_env_t oenv,int nfile,
       ati = tcbu->at_i;
       atj = tcbu->at_j;
       if (atj == -1) 
-       atj = ati;
+      {
+          atj = ati;
+      }
+      /* nbfp now includes the 6.0 derivative prefactors */
       tcbu->a = BHAMA(fr->nbfp,fr->ntype,ati,atj);
       tcbu->b = BHAMB(fr->nbfp,fr->ntype,ati,atj);
-      tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj);
+      tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj)/6.0;
       if (debug)
        fprintf(debug,"buck (type=%d) = %e, %e, %e\n",
                tcbu->at_i,tcbu->a,tcbu->b,tcbu->c);
index 27d46ad6607d8e4ffa2d20f1a4b802be839debfc..fca07bf6fb675c60519642d9dd21f16e034a39c0 100644 (file)
@@ -158,8 +158,9 @@ void copy_ff(t_coupl_rec *tcr,t_forcerec *fr,t_mdatoms *md,t_idef *idef)
     atj  = tclj->at_j;
     if (atj == -1)
       atj = ati;
-    tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj);
-    tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj);
+    /* nbfp now includes the 6.0/12.0 derivative prefactors */
+    tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj)/6.0;
+    tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj)/12.0;
   }
   
   for(i=0; (i<tcr->nBU); i++) {
@@ -169,9 +170,10 @@ void copy_ff(t_coupl_rec *tcr,t_forcerec *fr,t_mdatoms *md,t_idef *idef)
     atj  = tcbu->at_j;
     if (atj == -1)
       atj = ati;
+    /* nbfp now includes the 6.0 derivative prefactor */
     tcbu->a = BHAMA(fr->nbfp,fr->ntype,ati,atj);
     tcbu->b = BHAMB(fr->nbfp,fr->ntype,ati,atj);
-    tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj);
+    tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj)/6.0;
   }
   
   for(i=0; (i<tcr->nQ); i++) {
index 59613cc238d4b3a36f7336348c94de52fd22f785..7e17b8da3203cfae775503f2fd7a9428d917bbb3 100644 (file)
@@ -202,6 +202,7 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
     gmx_bool        bAppend;
     gmx_bool        bResetCountersHalfMaxH=FALSE;
     gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
+    gmx_bool        bUpdateDoLR;
     real        mu_aver=0,dvdl;
     int         a0,a1,gnx=0,ii;
     atom_id     *grpindex=NULL;
@@ -236,12 +237,8 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 
     if(MASTER(cr))
     {
-        fprintf(stderr,
-                "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
-                "We have just committed the new CPU detection code in this branch,\n"
-                "and will commit new SSE/AVX kernels in a few days. However, this\n"
-                "means that currently only the NxN kernels are accelerated!\n"
-                "In the mean time, you might want to avoid production runs in 4.6.\n\n");
+        gmx_warning("New C kernels (and force-only) kernels are now enabled,\n"
+                    "but it will be another couple of days for SSE/AVX kernels.\n\n");
     }
 
 #ifdef GMX_FAHCORE
@@ -522,8 +519,7 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
     /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
     if ((Flags & MD_TUNEPME) &&
         EEL_PME(fr->eeltype) &&
-        fr->cutoff_scheme == ecutsVERLET &&
-        (fr->nbv->bUseGPU || !(cr->duty & DUTY_PME)) &&
+        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
         !bRerunMD)
     {
         pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
@@ -1106,12 +1102,19 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         force_flags = (GMX_FORCE_STATECHANGED |
                        ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
                        GMX_FORCE_ALLFORCES |
-                       (bNStList ? GMX_FORCE_DOLR : 0) |
                        GMX_FORCE_SEPLRF |
                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
                        (bDoFEP ? GMX_FORCE_DHDL : 0)
             );
+
+        if(fr->bTwinRange)
+        {
+            if(do_per_step(step,ir->nstcalclr))
+            {
+                force_flags |= GMX_FORCE_DO_LR;
+            }
+        }
         
         if (shellfc)
         {
@@ -1179,8 +1182,16 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                 trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
             }
 
+            /* If we are using twin-range interactions where the long-range component
+             * is only evaluated every nstcalclr>1 steps, we should do a special update
+             * step to combine the long-range forces on these steps.
+             * For nstcalclr=1 this is not done, since the forces would have been added
+             * directly to the short-range forces already.
+             */
+            bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+            
             update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
-                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                          f,bUpdateDoLR,fr->f_twin,fcd,
                           ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
                           cr,nrnb,constr,&top->idef);
             
@@ -1650,9 +1661,11 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 
                 if (bVV)
                 {
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                     /* velocity half-step update */
                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                  bUpdateDoLR,fr->f_twin,fcd,
                                   ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
                                   cr,nrnb,constr,&top->idef);
                 }
@@ -1666,9 +1679,10 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                 {
                     copy_rvecn(state->x,cbuf,0,state->natoms);
                 }
-                
+                bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                 update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                              fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                              bUpdateDoLR,fr->f_twin,fcd,
                               ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
                 wallcycle_stop(wcycle,ewcUPDATE);
 
@@ -1693,8 +1707,10 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                     /* now we know the scaling, we can compute the positions again again */
                     copy_rvecn(cbuf,state->x,0,state->natoms);
 
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                  bUpdateDoLR,fr->f_twin,fcd,
                                   ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
                     wallcycle_stop(wcycle,ewcUPDATE);
 
@@ -2067,13 +2083,17 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                                          fr->ic,fr->nbv,&fr->pmedata,
                                          step);
 
+                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
                     fr->ewaldcoeff = fr->ic->ewaldcoeff;
+                    fr->rlist      = fr->ic->rlist;
+                    fr->rlistlong  = fr->ic->rlistlong;
+                    fr->rcoulomb   = fr->ic->rcoulomb;
+                    fr->rvdw       = fr->ic->rvdw;
                 }
-
                 cycles_pmes = 0;
             }
         }
-        
+
         if (step_rel == wcycle_get_reset_counters(wcycle) ||
             gs.set[eglsRESETCOUNTERS] != 0)
         {
index 297bbc1a0b0ac4da8c0179fe48fd900d94f58e78..07f0ecaccee38b501aca88e200d92dde9ed10cda 100644 (file)
@@ -669,23 +669,24 @@ static void checkGmxOptions(FILE* fplog, GmxOpenMMPlatformOptions *opt,
     for (i = 0; i < natoms; i++)
     {
         /* i-i */
-        c12 = C12(nbfp, natoms, i, i);
-        c6  = C6(nbfp,  natoms, i, i);
+        /* nbfp now includes the 6.0/12.0 prefactors to save flops in kernels */
+        c12 = C12(nbfp, natoms, i, i)/12.0;
+        c6  = C6(nbfp,  natoms, i, i)/6.0;
         convert_c_12_6(c12, c6, &sigma_ii, &eps_ii);
 
         for (j = 0; j < i; j++)
         {
             /* i-j */
-            c12 = C12(nbfp, natoms, i, j);
-            c6  = C6(nbfp,  natoms, i, j);
+            c12 = C12(nbfp, natoms, i, j)/12.0;
+            c6  = C6(nbfp,  natoms, i, j)/6.0;
             convert_c_12_6(c12, c6, &sigma_ij, &eps_ij);
             /* j-i */
-            c12 = C12(nbfp, natoms, j, i);
-            c6  = C6(nbfp,  natoms, j, i);
+            c12 = C12(nbfp, natoms, j, i)/12.0;
+            c6  = C6(nbfp,  natoms, j, i)/6.0;
             convert_c_12_6(c12, c6, &sigma_ji, &eps_ji);
             /* j-j */
-            c12 = C12(nbfp, natoms, j, j);
-            c6  = C6(nbfp,  natoms, j, j);
+            c12 = C12(nbfp, natoms, j, j)/12.0;
+            c6  = C6(nbfp,  natoms, j, j)/6.0;
             convert_c_12_6(c12, c6, &sigma_jj, &eps_jj);
             /* OpenMM hardcoded combination rules */
             sigma_comb = COMBRULE_SIGMA(sigma_ii, sigma_jj);
@@ -1107,8 +1108,9 @@ void* openmm_init(FILE *fplog, const char *platformOptStr,
 
         for (int i = 0; i < numAtoms; ++i)
         {
-            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1];
-            double c6 = nbfp[types[i]*2*ntypes+types[i]*2];
+            /* nbfp now includes the 6.0/12.0 derivative prefactors to save flops in kernels*/
+            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1]/12.0;
+            double c6 = nbfp[types[i]*2*ntypes+types[i]*2]/6.0;
             double sigma=0.0, epsilon=0.0;
             convert_c_12_6(c12, c6, &sigma, &epsilon);
             nonbondedForce->addParticle(charges[i], sigma, epsilon);
index 13e992f993448887485eed33bdec032ec8d6ace0..afe17d0fc70741c735b51e7fbcaec5b743a2e173 100644 (file)
 
 /* Parameters and setting for one PP-PME setup */
 typedef struct {
-    real rcut;            /* Coulomb cut-off                              */
+    real rcut_coulomb;    /* Coulomb cut-off                              */
     real rlist;           /* pair-list cut-off                            */
+    real rlistlong;       /* LR pair-list cut-off                         */
+    int  nstcalclr;       /* frequency of evaluating long-range forces for group scheme */
     real spacing;         /* (largest) PME grid spacing                   */
     ivec grid;            /* the PME grid dimensions                      */
     real grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
@@ -83,7 +85,11 @@ struct pme_load_balancing {
     int  nstage;        /* the current maximum number of stages */
 
     real cut_spacing;   /* the minimum cutoff / PME grid spacing ratio */
-    real rbuf;          /* the pairlist buffer size */
+    real rcut_vdw;      /* Vdw cutoff (does not change) */
+    real rcut_coulomb_start; /* Initial electrostatics cutoff */
+    int  nstcalclr_start; /* Initial electrostatics cutoff */
+    real rbuf_coulomb;  /* the pairlist buffer size */
+    real rbuf_vdw;      /* the pairlist buffer size */
     matrix box_start;   /* the initial simulation box */
     int n;              /* the count of setup as well as the allocation size */
     pme_setup_t *setup; /* the PME+cutoff setups */
@@ -92,6 +98,7 @@ struct pme_load_balancing {
     int start;          /* start of setup range to consider in stage>0 */
     int end;            /* end   of setup range to consider in stage>0 */
     int elimited;       /* was the balancing limited, uses enum above */
+    int cutoff_scheme;  /* Verlet or group cut-offs */
 
     int stage;          /* the current stage */
 };
@@ -110,7 +117,32 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
     /* Any number of stages >= 2 is supported */
     pme_lb->nstage   = 2;
 
-    pme_lb->rbuf = ic->rlist - ic->rcoulomb;
+    pme_lb->cutoff_scheme = ir->cutoff_scheme;
+
+    if(pme_lb->cutoff_scheme == ecutsVERLET)
+    {
+        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
+        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
+    }
+    else
+    {
+        if(ic->rcoulomb > ic->rlist)
+        {
+            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
+        }
+        else
+        {
+            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
+        }
+        if(ic->rvdw > ic->rlist)
+        {
+            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
+        }
+        else
+        {
+            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
+        }
+    }
 
     copy_mat(box,pme_lb->box_start);
     if (ir->ePBC==epbcXY && ir->nwall==2)
@@ -121,13 +153,19 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
     pme_lb->n = 1;
     snew(pme_lb->setup,pme_lb->n);
 
+    pme_lb->rcut_vdw              = ic->rvdw;
+    pme_lb->rcut_coulomb_start    = ir->rcoulomb;
+    pme_lb->nstcalclr_start       = ir->nstcalclr;
+    
     pme_lb->cur = 0;
-    pme_lb->setup[0].rcut       = ic->rcoulomb;
-    pme_lb->setup[0].rlist      = ic->rlist;
-    pme_lb->setup[0].grid[XX]   = ir->nkx;
-    pme_lb->setup[0].grid[YY]   = ir->nky;
-    pme_lb->setup[0].grid[ZZ]   = ir->nkz;
-    pme_lb->setup[0].ewaldcoeff = ic->ewaldcoeff;
+    pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
+    pme_lb->setup[0].rlist        = ic->rlist;
+    pme_lb->setup[0].rlistlong    = ic->rlistlong;
+    pme_lb->setup[0].nstcalclr    = ir->nstcalclr;
+    pme_lb->setup[0].grid[XX]     = ir->nkx;
+    pme_lb->setup[0].grid[YY]     = ir->nky;
+    pme_lb->setup[0].grid[ZZ]     = ir->nkz;
+    pme_lb->setup[0].ewaldcoeff   = ic->ewaldcoeff;
 
     pme_lb->setup[0].pmedata  = pmedata;
     
@@ -166,6 +204,7 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
 {
     pme_setup_t *set;
     real fac,sp;
+    real tmpr_coulomb,tmpr_vdw;
     int d;
 
     /* Try to add a new setup with next larger cut-off to the list */
@@ -200,9 +239,55 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
     }
     while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
 
-    set->rcut    = pme_lb->cut_spacing*sp;
-    set->rlist   = set->rcut + pme_lb->rbuf;
-    set->spacing = sp;
+    set->rcut_coulomb = pme_lb->cut_spacing*sp;
+
+    if(pme_lb->cutoff_scheme == ecutsVERLET)
+    {
+        set->rlist        = set->rcut_coulomb + pme_lb->rbuf_coulomb;
+        /* We dont use LR lists with Verlet, but this avoids if-statements in further checks */
+        set->rlistlong    = set->rlist;
+    }
+    else
+    {
+        tmpr_coulomb          = set->rcut_coulomb + pme_lb->rbuf_coulomb;
+        tmpr_vdw              = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
+        set->rlist            = min(tmpr_coulomb,tmpr_vdw);
+        set->rlistlong        = max(tmpr_coulomb,tmpr_vdw);
+        
+        /* Set the long-range update frequency */
+        if(set->rlist == set->rlistlong)
+        {
+            /* No long-range interactions if the short-/long-range cutoffs are identical */
+            set->nstcalclr = 0;
+        }
+        else if(pme_lb->nstcalclr_start==0 || pme_lb->nstcalclr_start==1)
+        {
+            /* We were not doing long-range before, but now we are since rlist!=rlistlong */
+            set->nstcalclr = 1;
+        }
+        else
+        {
+            /* We were already doing long-range interactions from the start */
+            if(pme_lb->rcut_vdw > pme_lb->rcut_coulomb_start)
+            {
+                /* We were originally doing long-range VdW-only interactions.
+                 * If rvdw is still longer than rcoulomb we keep the original nstcalclr,
+                 * but if the coulomb cutoff has become longer we should update the long-range
+                 * part every step.
+                 */
+                set->nstcalclr = (tmpr_vdw > tmpr_coulomb) ? pme_lb->nstcalclr_start : 1;
+            }
+            else
+            {
+                /* We were not doing any long-range interaction from the start,
+                 * since it is not possible to do twin-range coulomb for the PME interaction.
+                 */
+                set->nstcalclr = 1;
+            }
+        }
+    }
+    
+    set->spacing      = sp;
     /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
     set->grid_efficiency = 1;
     for(d=0; d<DIM; d++)
@@ -211,17 +296,16 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
     }
     /* The Ewald coefficient is inversly proportional to the cut-off */
     set->ewaldcoeff =
-        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut/set->rcut;
+        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
 
     set->count   = 0;
     set->cycles  = 0;
 
     if (debug)
     {
-        fprintf(debug,"PME loadbal: grid %d %d %d, cutoff %f\n",
-                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut);
+        fprintf(debug,"PME loadbal: grid %d %d %d, coulomb cutoff %f\n",
+                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb);
     }
-
     return TRUE;
 }
 
@@ -232,7 +316,7 @@ static void print_grid(FILE *fp_err,FILE *fp_log,
                        double cycles)
 {
     char buf[STRLEN],buft[STRLEN];
-    
+
     if (cycles >= 0)
     {
         sprintf(buft,": %.1f M-cycles",cycles*1e-6);
@@ -241,9 +325,9 @@ static void print_grid(FILE *fp_err,FILE *fp_log,
     {
         buft[0] = '\0';
     }
-    sprintf(buf,"%-11s%10s pme grid %d %d %d, cutoff %.3f%s",
+    sprintf(buf,"%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f%s",
             pre,
-            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut,
+            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb,
             buft);
     if (fp_err != NULL)
     {
@@ -274,10 +358,10 @@ static void print_loadbal_limited(FILE *fp_err,FILE *fp_log,
 {
     char buf[STRLEN],sbuf[22];
 
-    sprintf(buf,"step %4s: the %s limited the PME load balancing to a cut-off of %.3f",
+    sprintf(buf,"step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
             gmx_step_str(step,sbuf),
             pmelblim_str[pme_lb->elimited],
-            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut);
+            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
     if (fp_err != NULL)
     {
         fprintf(fp_err,"\r%s\n",buf);
@@ -335,6 +419,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
     pme_setup_t *set;
     double cycles_fast;
     char buf[STRLEN],sbuf[22];
+    real rtab;
 
     if (pme_lb->stage == pme_lb->nstage)
     {
@@ -348,8 +433,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
     }
 
     set = &pme_lb->setup[pme_lb->cur];
-
     set->count++;
+
+    rtab = ir->rlistlong + ir->tabext;
+
     if (set->count % 2 == 1)
     {
         /* Skip the first cycle, because the first step after a switch
@@ -423,10 +510,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                 /* Find the next setup */
                 OK = pme_loadbal_increase_cutoff(pme_lb,ir->pme_order);
             }
-                
+
             if (OK && ir->ePBC != epbcNONE)
             {
-                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlist)
+                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                       <= max_cutoff2(ir->ePBC,state->box));
                 if (!OK)
                 {
@@ -441,7 +528,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                 if (DOMAINDECOMP(cr))
                 {
                     OK = change_dd_cutoff(cr,state,ir,
-                                          pme_lb->setup[pme_lb->cur].rlist);
+                                          pme_lb->setup[pme_lb->cur].rlistlong);
                     if (!OK)
                     {
                         /* Failed: do not use this setup */
@@ -506,7 +593,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 
     if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
     {
-        OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlist);
+        OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlistlong);
         if (!OK)
         {
             /* Failsafe solution */
@@ -527,22 +614,28 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 
     set = &pme_lb->setup[pme_lb->cur];
 
-    ic->rcoulomb   = set->rcut;
+    ic->rcoulomb   = set->rcut_coulomb;
     ic->rlist      = set->rlist;
+    ic->rlistlong  = set->rlistlong;
+    ir->nstcalclr  = set->nstcalclr;
     ic->ewaldcoeff = set->ewaldcoeff;
 
-    if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
+    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
     {
         nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv,ic);
     }
     else
     {
-        init_interaction_const_tables(NULL,ic,nbv->grp[0].kernel_type);
+        init_interaction_const_tables(NULL,ic,ir->cutoff_scheme,
+                                      (ir->cutoff_scheme == ecutsGROUP) ? -1 : nbv->grp[0].kernel_type,
+                                      rtab);
     }
 
-    if (nbv->ngrp > 1)
+    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
     {
-        init_interaction_const_tables(NULL,ic,nbv->grp[1].kernel_type);
+        init_interaction_const_tables(NULL,ic,ir->cutoff_scheme,
+                                      (ir->cutoff_scheme == ecutsGROUP) ? -1 : nbv->grp[1].kernel_type,
+                                      rtab);
     }
 
     if (cr->duty & DUTY_PME)
@@ -594,7 +687,7 @@ static void print_pme_loadbal_setting(FILE *fplog,
     fprintf(fplog,
             "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
             name,
-            setup->rcut,setup->rlist,
+            setup->rcut_coulomb,setup->rlist,
             setup->grid[XX],setup->grid[YY],setup->grid[ZZ],
             setup->spacing,1/setup->ewaldcoeff);
 }
@@ -604,7 +697,7 @@ static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
 {
     double pp_ratio,grid_ratio;
 
-    pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlist,3.0);
+    pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlistlong,3.0);
     grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
         (double)pme_grid_points(&pme_lb->setup[0]);
 
index 6f0ac29fc08d5e13871863e0c753c94119562853..edaf0d31605cd030b9566db1f3348be099f3f932 100644 (file)
@@ -241,12 +241,6 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
 
     if (ir->cutoff_scheme == ecutsGROUP)
     {
-        if (ir->coulomb_modifier != eintmodNONE ||
-            ir->vdw_modifier != eintmodNONE)
-        {
-            warning_error(wi,"potential modifiers are not supported (yet) with the group cut-off scheme");
-        }
-
         /* BASIC CUT-OFF STUFF */
         if (ir->rlist == 0 ||
             !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
@@ -276,7 +270,16 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
             warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
         }
     }
-
+    
+    if(ir->rlistlong == ir->rlist)
+    {
+        ir->nstcalclr = 0;
+    }
+    else if(ir->rlistlong>ir->rlist && ir->nstcalclr==0)
+    {
+        warning_error(wi,"With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
+    }
+    
     if (ir->cutoff_scheme == ecutsVERLET)
     {
         real rc_max;
@@ -370,6 +373,28 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
         ir->rlistlong = ir->rlist;
     }
 
+    if(ir->nstcalclr==-1)
+    {
+        /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
+        ir->nstcalclr = ir->nstlist;
+    }
+    else if(ir->nstcalclr>0)
+    {
+        if(ir->nstlist>0 && (ir->nstlist % ir->nstcalclr != 0))
+        {
+            warning_error(wi,"nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
+        }
+    }
+    else if(ir->nstcalclr<-1)
+    {
+        warning_error(wi,"nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
+    }
+    
+    if(EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr>1)
+    {
+        warning_error(wi,"When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
+    }
+       
     /* GENERAL INTEGRATOR STUFF */
     if (!(ir->eI == eiMD || EI_VV(ir->eI)))
     {
@@ -878,7 +903,8 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
   /* More checks are in triple check (grompp.c) */
 
   if (ir->coulombtype == eelSWITCH) {
-    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
+    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious "
+            "artifacts, advice: use coulombtype = %s",
            eel_names[ir->coulombtype],
            eel_names[eelRF_ZERO]);
     warning(wi,warn_buf);
@@ -890,7 +916,7 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
   }
 
   if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
-    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
+    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
     warning(wi,warn_buf);
     ir->epsilon_rf = ir->epsilon_r;
     ir->epsilon_r  = 1.0;
@@ -905,9 +931,10 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
     /* reaction field (at the cut-off) */
     
     if (ir->coulombtype == eelRF_ZERO) {
-       sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
+       sprintf(warn_buf,"With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
               eel_names[ir->coulombtype]);
-      CHECK(ir->epsilon_rf != 0);
+        CHECK(ir->epsilon_rf != 0);
+        ir->epsilon_rf = 0.0;
     }
 
     sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
@@ -926,37 +953,47 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
   if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
     if (EEL_SWITCHED(ir->coulombtype)) {
       sprintf(err_buf,
-             "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
+             "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
              eel_names[ir->coulombtype]);
       CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
     }
   } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
-      if (ir->cutoff_scheme == ecutsGROUP) {
-          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
+      if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE) {
+          sprintf(err_buf,"With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
                   eel_names[ir->coulombtype]);
           CHECK(ir->rlist > ir->rcoulomb);
       }
   }
 
-  if (EEL_FULL(ir->coulombtype)) {
-    if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
-        ir->coulombtype==eelPMEUSERSWITCH) {
-      sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
-             eel_names[ir->coulombtype]);
-      CHECK(ir->rcoulomb > ir->rlist);
-    } else if (ir->cutoff_scheme == ecutsGROUP) {
-      if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) {
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist\n"
-               "If you want optimal energy conservation or exact integration use %s",
-               eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
-      } else { 
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist",
-               eel_names[ir->coulombtype]);
+  if(ir->coulombtype==eelSWITCH || ir->coulombtype==eelSHIFT ||
+     ir->vdwtype==evdwSWITCH || ir->vdwtype==evdwSHIFT)
+  {
+      sprintf(warn_buf,
+              "The switch/shift interaction settings are just for compatibility; you will get better"
+              "performance from applying potential modifiers to your interactions!\n");
+      warning_note(wi,warn_buf);
+  }
+
+  if (EEL_FULL(ir->coulombtype))
+  {
+      if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
+          ir->coulombtype==eelPMEUSERSWITCH)
+      {
+          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
+                  eel_names[ir->coulombtype]);
+          CHECK(ir->rcoulomb > ir->rlist);
+      }
+      else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
+      {
+          if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
+          {
+              sprintf(err_buf,
+                      "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist.\n"
+                      "If you want optimal energy conservation, consider using a potential modifier.",
+                      eel_names[ir->coulombtype]);
+              CHECK(ir->rcoulomb != ir->rlist);
+          }
       }
-      CHECK(ir->rcoulomb != ir->rlist);
-    }
   }
 
   if (EEL_PME(ir->coulombtype)) {
@@ -977,12 +1014,12 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
   }
 
   if (EVDW_SWITCHED(ir->vdwtype)) {
-    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
+    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw. Or, better - use a potential modifier.",
            evdw_names[ir->vdwtype]);
     CHECK(ir->rvdw_switch >= ir->rvdw);
   } else if (ir->vdwtype == evdwCUT) {
-      if (ir->cutoff_scheme == ecutsGROUP) {
-          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
+      if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE) {
+          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier",evdw_names[ir->vdwtype]);
           CHECK(ir->rlist > ir->rvdw);
       }
   }
@@ -1008,13 +1045,6 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
   }
 
   if (ir->nstlist == -1) {
-    sprintf(err_buf,
-           "nstlist=-1 only works with switched or shifted potentials,\n"
-           "suggestion: use vdw-type=%s and coulomb-type=%s",
-           evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
-    CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
-            EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
-
     sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
     CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
   }
@@ -1033,13 +1063,13 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
     /* ENERGY CONSERVATION */
     if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
     {
-        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
+        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
         {
             sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
                     evdw_names[evdwSHIFT]);
             warning_note(wi,warn_buf);
         }
-        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
+        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0 && ir->coulomb_modifier == eintmodNONE)
         {
             sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
                     eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
@@ -1113,6 +1143,7 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
 
     if (ir->bAdress)
     {
+        warning_error(wi,"AdResS is currently disabled\n");
         if (ir->cutoff_scheme != ecutsGROUP)
         {
             warning_error(wi,"AdresS simulation supports only cutoff-scheme=group");
@@ -1585,6 +1616,7 @@ void get_ir(const char *mdparin,const char *mdparout,
   RTYPE ("rlist",      ir->rlist,      -1);
   CTYPE ("long-range cut-off for switched potentials");
   RTYPE ("rlistlong",  ir->rlistlong,  -1);
+  ITYPE ("nstcalclr",  ir->nstcalclr,  -1);
 
   /* Electrostatics */
   CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
@@ -2623,7 +2655,7 @@ void do_index(const char* mdparin, const char *ndx,
 
           if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
           {
-              warning_note(wi,"tau-t = -1 is the new value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
+              warning_note(wi,"tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
           }
 
           if (ir->opts.tau_t[i] >= 0)
index e4d2403dc99873ac6187f6101d3dcc18f394daf8..d8f8a383f57a41053695bff1c9ae5f43ed3bbbfb 100644 (file)
@@ -1140,7 +1140,7 @@ static void override_nsteps_cmdline(FILE *fplog,
  * before the other nodes have read the tpx file and called gmx_detect_hardware.
  */
 typedef struct {
-    int      cutoff_scheme; /* The cutoff-scheme from inputrec_t */
+    int cutoff_scheme; /* The cutoff scheme from inputrec_t */
     gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
 } master_inf_t;
 
index 4d6fca8cb757256c9bfe4a5af5d2f6d5da2cd764..0cb55679d79db24e2a91da5e15dec35d19661426 100644 (file)
@@ -632,6 +632,7 @@ static void cmp_inputrec(FILE *fp,t_inputrec *ir1,t_inputrec *ir2,real ftol, rea
   cmp_real(fp,"inputrec->verletbuf_drift",-1,ir1->verletbuf_drift,ir2->verletbuf_drift,ftol,abstol);
   cmp_real(fp,"inputrec->rlist",-1,ir1->rlist,ir2->rlist,ftol,abstol);
   cmp_real(fp,"inputrec->rlistlong",-1,ir1->rlistlong,ir2->rlistlong,ftol,abstol);
+  cmp_int(fp,"inputrec->nstcalclr",-1,ir1->nstcalclr,ir2->nstcalclr);
   cmp_real(fp,"inputrec->rtpi",-1,ir1->rtpi,ir2->rtpi,ftol,abstol);
   cmp_int(fp,"inputrec->coulombtype",-1,ir1->coulombtype,ir2->coulombtype);
   cmp_int(fp,"inputrec->coulomb_modifier",-1,ir1->coulomb_modifier,ir2->coulomb_modifier);
index 4c48f83dad462d3fd57e7a44e67f8a16f30cc25a..b5c90324548d4a83ecb47be4a1dd04992bd84e01 100644 (file)
@@ -543,12 +543,12 @@ adress_thermo_force(int                  start,
                 {
                     if (fr->n_adress_tf_grps > 0 ){
                         /* multi component tf is on, select the right table */
-                        ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].tab;
+                        ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].data;
                         tabscale = fr->atf_tabs[mdatoms->tf_table_index[iatom]].scale;
                     }
                     else {
                     /* just on component*/
-                        ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].tab;
+                        ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].data;
                         tabscale = fr->atf_tabs[DEFAULT_TF_TABLE].scale;
                     }
                     
index 8eba3d7dfabc11924a4a1d6991ce85e1323d9dac..320002ef6cb2dccdecb27d13624a74073e79afc5 100644 (file)
@@ -252,7 +252,7 @@ typedef struct gmx_domdec_comm
 
     /* Cell sizes for static load balancing, first index cartesian */
     real **slb_frac;
-    
+
     /* The width of the communicated boundaries */
     real cutoff_mbody;
     real cutoff;
@@ -5009,7 +5009,7 @@ static double force_flop_count(t_nrnb *nrnb)
     const char *name;
 
     sum = 0;
-    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
+    for(i=0; i<eNR_NBKERNEL_FREE_ENERGY; i++)
     {
         /* To get closer to the real timings, we half the count
          * for the normal loops and again half it for water loops.
index 3d3699bad0a01b492d94d16484055bf735abe26a..d967c87c397ce003e90ca16119ce18a71d6b523a 100644 (file)
@@ -79,9 +79,7 @@ void ns(FILE *fp,
         real       *dvdlambda,
         gmx_grppairener_t *grppener,
         gmx_bool       bFillGrid,
-        gmx_bool       bDoLongRange,
-        gmx_bool       bDoForces,
-        rvec       *f)
+        gmx_bool       bDoLongRangeNS)
 {
   char   *ptr;
   int    nsearch;
@@ -97,8 +95,7 @@ void ns(FILE *fp,
 
     nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
                                 lambda,dvdlambda,grppener,
-                                bFillGrid,bDoLongRange,
-                                bDoForces,f);
+                                bFillGrid,bDoLongRangeNS);
   if (debug)
     fprintf(debug,"nsearch = %d\n",nsearch);
 
@@ -146,6 +143,7 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                        t_grpopts  *opts,
                        rvec       x[],      history_t  *hist,
                        rvec       f[],
+                       rvec       f_longrange[],
                        gmx_enerdata_t *enerd,
                        t_fcdata   *fcd,
                        gmx_mtop_t     *mtop,
@@ -258,18 +256,25 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
     if (flags & GMX_FORCE_NONBONDED)
     {
         donb_flags = 0;
+        /* Add short-range interactions */
+        donb_flags |= GMX_NONBONDED_DO_SR;
+
         if (flags & GMX_FORCE_FORCES)
         {
-            donb_flags |= GMX_DONB_FORCES;
+            donb_flags |= GMX_NONBONDED_DO_FORCE;
+        }
+        if (flags & GMX_FORCE_ENERGY)
+        {
+            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
+        }
+        if (flags & GMX_FORCE_DO_LR)
+        {
+            donb_flags |= GMX_NONBONDED_DO_LR;
         }
 
         wallcycle_sub_start(wcycle, ewcsNONBONDED);
-        do_nonbonded(cr,fr,x,f,md,excl,
-                    fr->bBHAM ?
-                    enerd->grpp.ener[egBHAMSR] :
-                    enerd->grpp.ener[egLJSR],
-                    enerd->grpp.ener[egCOULSR],
-                    enerd->grpp.ener[egGB],box_size,nrnb,
+        do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
+                    &enerd->grpp,box_size,nrnb,
                     lambda,dvdl_nb,-1,-1,donb_flags);
         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
     }
@@ -289,14 +294,10 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                 lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
             }
             reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
-            do_nonbonded(cr,fr,x,f,md,excl,
-                         fr->bBHAM ?
-                         ed_lam.grpp.ener[egBHAMSR] :
-                         ed_lam.grpp.ener[egLJSR],
-                         ed_lam.grpp.ener[egCOULSR],
-                         enerd->grpp.ener[egGB], box_size,nrnb,
+            do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
+                         &(ed_lam.grpp), box_size,nrnb,
                          lam_i,dvdl_dum,-1,-1,
-                         GMX_DONB_FOREIGNLAMBDA);
+                         GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
             sum_epot(&ir->opts,&ed_lam);
             enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
         }
index bf7b64eadcc85ca6537a548f1f3492fe9cab4327..a677e916d0279f0cfaba86ed21c85486c9c078f9 100644 (file)
@@ -27,7 +27,7 @@
  * 
  * To help us fund GROMACS development, we humbly ask that you cite
  * the papers on the package - you can find them in the top README file.
- * 
+ *
  * For more info, check our website at http://www.gromacs.org
  * 
  * And Hey:
@@ -102,10 +102,10 @@ static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
       fprintf(fp,"%2d - %2d",i,j);
       if (bBHAM)
        fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
-               BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j));
+               BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j)/6.0);
       else
-       fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j),
-               C12(nbfp,atnr,i,j));
+       fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j)/6.0,
+            C12(nbfp,atnr,i,j)/12.0);
     }
   }
 }
@@ -121,9 +121,10 @@ static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
     snew(nbfp,3*atnr*atnr);
     for(i=k=0; (i<atnr); i++) {
       for(j=0; (j<atnr); j++,k++) {
-       BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
-       BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
-       BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c;
+          BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
+          BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
+          /* nbfp now includes the 6.0 derivative prefactor */
+          BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c*6.0;
       }
     }
   }
@@ -131,11 +132,13 @@ static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
     snew(nbfp,2*atnr*atnr);
     for(i=k=0; (i<atnr); i++) {
       for(j=0; (j<atnr); j++,k++) {
-       C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6;
-       C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12;
+          /* nbfp now includes the 6.0/12.0 derivative prefactors */
+          C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6*6.0;
+          C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12*12.0;
       }
     }
   }
+
   return nbfp;
 }
 
@@ -357,10 +360,9 @@ check_solvent_cg(const gmx_moltype_t   *molt,
         /* So, is it an SPC?
          * For this we require thatn all atoms have charge, 
          * the charges on atom 2 & 3 should be the same, and only
-         * atom 1 should have VdW.
+         * atom 1 might have VdW.
          */
-        if (has_vdw[0] == TRUE && 
-            has_vdw[1] == FALSE &&
+        if (has_vdw[1] == FALSE &&
             has_vdw[2] == FALSE &&
             tmp_charge[0]  != 0 &&
             tmp_charge[1]  != 0 &&
@@ -383,10 +385,9 @@ check_solvent_cg(const gmx_moltype_t   *molt,
     {
         /* Or could it be a TIP4P?
          * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
-         * Only atom 1 should have VdW.
+         * Only atom 1 mght have VdW.
          */
-        if(has_vdw[0] == TRUE && 
-           has_vdw[1] == FALSE &&
+        if(has_vdw[1] == FALSE &&
            has_vdw[2] == FALSE &&
            has_vdw[3] == FALSE &&
            tmp_charge[0]  == 0 &&
@@ -922,10 +923,12 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                         npair_ij = tmpi*(tmpi - 1)/2;
                     }
                     if (bBHAM) {
-                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj);
+                        /* nbfp now includes the 6.0 derivative prefactor */
+                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                     } else {
-                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj);
-                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj);
+                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj)/6.0;
+                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj)/12.0;
                     }
                     npair += npair_ij;
                 }
@@ -965,10 +968,12 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                                 tpj = atoms->atom[k].typeB;
                             }
                             if (bBHAM) {
-                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj);
+                                /* nbfp now includes the 6.0 derivative prefactor */
+                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                             } else {
-                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj);
-                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj);
+                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj)/6.0;
+                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj)/12.0;
                             }
                             nexcl += nmol;
                         }
@@ -1020,12 +1025,14 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                         }
                         if (bBHAM)
                         {
-                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
+                            /* nbfp now includes the 6.0 derivative prefactor */
+                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                         }
                         else
                         {
-                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
-                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
+                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj)/6.0;
+                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj)/12.0;
                         }
                         npair += nmolc;
                     }
@@ -1121,42 +1128,61 @@ static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
 
 static void make_nbf_tables(FILE *fp,const output_env_t oenv,
                             t_forcerec *fr,real rtab,
-                           const t_commrec *cr,
-                           const char *tabfn,char *eg1,char *eg2,
-                           t_nblists *nbl)
+                            const t_commrec *cr,
+                            const char *tabfn,char *eg1,char *eg2,
+                            t_nblists *nbl)
 {
-  char buf[STRLEN];
-  int i,j;
+    char buf[STRLEN];
+    int i,j;
 
-  if (tabfn == NULL) {
-    if (debug)
-      fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
-    return;
-  }
-    
-  sprintf(buf,"%s",tabfn);
-  if (eg1 && eg2)
+    if (tabfn == NULL) {
+        if (debug)
+            fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
+        return;
+    }
+
+    sprintf(buf,"%s",tabfn);
+    if (eg1 && eg2)
     /* Append the two energy group names */
-    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
-           eg1,eg2,ftp2ext(efXVG));
-  nbl->tab = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
-  /* Copy the contents of the table to separate coulomb and LJ tables too,
-   * to improve cache performance.
-   */
-
-  /* For performance reasons we want
-   * the table data to be aligned to 16-byte. The pointer could be freed
-   * but currently isn't.
-   */
-  snew_aligned(nbl->vdwtab,8*(nbl->tab.n+1),16);
-  snew_aligned(nbl->coultab,4*(nbl->tab.n+1),16);
-  
-  for(i=0; i<=nbl->tab.n; i++) {
-    for(j=0; j<4; j++)
-      nbl->coultab[4*i+j] = nbl->tab.tab[12*i+j];
-    for(j=0; j<8; j++)
-      nbl->vdwtab [8*i+j] = nbl->tab.tab[12*i+4+j];
-  }
+        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
+                eg1,eg2,ftp2ext(efXVG));
+    nbl->table_elec_vdw = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
+    /* Copy the contents of the table to separate coulomb and LJ tables too,
+     * to improve cache performance.
+     */
+    /* For performance reasons we want
+     * the table data to be aligned to 16-byte. The pointers could be freed
+     * but currently aren't.
+     */
+    nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
+    nbl->table_elec.format = nbl->table_elec_vdw.format;
+    nbl->table_elec.r = nbl->table_elec_vdw.r;
+    nbl->table_elec.n = nbl->table_elec_vdw.n;
+    nbl->table_elec.scale = nbl->table_elec_vdw.scale;
+    nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
+    nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
+    nbl->table_elec.ninteractions = 1;
+    nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
+    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
+
+    nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
+    nbl->table_vdw.format = nbl->table_elec_vdw.format;
+    nbl->table_vdw.r = nbl->table_elec_vdw.r;
+    nbl->table_vdw.n = nbl->table_elec_vdw.n;
+    nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
+    nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
+    nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
+    nbl->table_vdw.ninteractions = 2;
+    nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
+    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
+
+    for(i=0; i<=nbl->table_elec_vdw.n; i++)
+    {
+        for(j=0; j<4; j++)
+            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
+        for(j=0; j<8; j++)
+            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
+    }
 }
 
 static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
@@ -1494,73 +1520,54 @@ static void pick_nbnxn_kernel(FILE *fp,
 }
 
 
-static void init_verlet_ewald_f_table(interaction_const_t *ic,
-                                      int verlet_kernel_type)
+static void init_ewald_f_table(interaction_const_t *ic,
+                               int cutoff_scheme,
+                               int  verlet_kernel_type,
+                               real rtab)
 {
-    if (nbnxn_kernel_pairlist_simple(verlet_kernel_type))
+    real maxr;
+
+    if (cutoff_scheme==ecutsGROUP || nbnxn_kernel_pairlist_simple(verlet_kernel_type))
     {
         /* With a spacing of 0.0005 we are at the force summation accuracy
          * for the SSE kernels for "normal" atomistic simulations.
          */
         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
                                                    ic->rcoulomb);
-        ic->tabq_size  = (int)(ic->rcoulomb*ic->tabq_scale) + 2;
-#ifndef GMX_DOUBLE
-        ic->tabq_format = tableformatFDV0;
-#else
-        ic->tabq_format = tableformatF;
-#endif
+        
+        maxr = (rtab>ic->rcoulomb) ? rtab : ic->rcoulomb;
+        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
     }
     else
     {
         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
-        if (verlet_kernel_type == nbk8x8x8_CUDA)
-        {
-            /* This case is handled in the nbnxn CUDA module */
-            ic->tabq_format = tableformatNONE;
-        }
-        else
-        {
-            ic->tabq_format = tableformatF;
-        }
-    }
-
-    switch (ic->tabq_format)
-    {
-    case tableformatNONE:
-        break;
-    case tableformatF:
-        sfree_aligned(ic->tabq_coul_F);
-        sfree_aligned(ic->tabq_coul_V);
-        snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
-        snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
-        table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,
-                                    ic->tabq_size,ic->tabq_format,
-                                    1/ic->tabq_scale,ic->ewaldcoeff);
-        break;
-    case tableformatFDV0:
-        sfree_aligned(ic->tabq_coul_F);
-        snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
-        table_spline3_fill_ewald_lr(ic->tabq_coul_FDV0,NULL,
-                                    ic->tabq_size,ic->tabq_format,
-                                    1/ic->tabq_scale,ic->ewaldcoeff);
-        break;
-    default:
-        gmx_incons("Unknown table format");
     }
+
+    sfree_aligned(ic->tabq_coul_FDV0);
+    sfree_aligned(ic->tabq_coul_F);
+    sfree_aligned(ic->tabq_coul_V);
+
+    /* Create the original table data in FDV0 */
+    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
+    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
+    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
+    table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
+                                ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
 }
 
 void init_interaction_const_tables(FILE *fp, 
                                    interaction_const_t *ic,
-                                   int verlet_kernel_type)
+                                   int cutoff_scheme,
+                                   int verlet_kernel_type,
+                                   real rtab)
 {
     real spacing;
 
     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
     {
-        init_verlet_ewald_f_table(ic,verlet_kernel_type);
+        init_ewald_f_table(ic,cutoff_scheme,verlet_kernel_type,rtab);
 
         if (fp != NULL)
         {
@@ -1572,17 +1579,24 @@ void init_interaction_const_tables(FILE *fp,
 
 void init_interaction_const(FILE *fp, 
                             interaction_const_t **interaction_const,
-                            const t_forcerec *fr)
+                            const t_forcerec *fr,
+                            real  rtab)
 {
     interaction_const_t *ic;
 
     snew(ic, 1);
 
-    ic->rlist       = fr->rlist;
+    /* Just allocate something so we can free it */
+    snew_aligned(ic->tabq_coul_FDV0,16,16);
+    snew_aligned(ic->tabq_coul_F,16,16);
+    snew_aligned(ic->tabq_coul_V,16,16);
 
+    ic->rlist       = fr->rlist;
+    ic->rlistlong   = fr->rlistlong;
+    
     /* Lennard-Jones */
     ic->rvdw        = fr->rvdw;
-    if (fr->vdw_pot_shift)
+    if (fr->vdw_modifier==eintmodPOTSHIFT)
     {
         ic->sh_invrc6 = pow(ic->rvdw,-6.0);
     }
@@ -1599,7 +1613,7 @@ void init_interaction_const(FILE *fp,
 
     /* Ewald */
     ic->ewaldcoeff  = fr->ewaldcoeff;
-    if (fr->coul_pot_shift)
+    if (fr->coulomb_modifier==eintmodPOTSHIFT)
     {
         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
     }
@@ -1620,7 +1634,7 @@ void init_interaction_const(FILE *fp,
         /* For plain cut-off we might use the reaction-field kernels */
         ic->epsilon_rf = ic->epsilon_r;
         ic->k_rf       = 0;
-        if (fr->coul_pot_shift)
+        if (fr->coulomb_modifier==eintmodPOTSHIFT)
         {
             ic->c_rf   = 1/ic->rcoulomb;
         }
@@ -1652,10 +1666,14 @@ void init_interaction_const(FILE *fp,
         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
     }
 
-    if (fr->cutoff_scheme == ecutsVERLET)
+    if(fr->cutoff_scheme == ecutsGROUP)
+    {
+        init_interaction_const_tables(fp,ic,ecutsGROUP,-1,rtab);
+    }
+    else
     {
         assert(fr->nbv != NULL && fr->nbv->grp != NULL);
-        init_interaction_const_tables(fp,ic,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type);
+        init_interaction_const_tables(fp,ic,ecutsVERLET,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type,rtab);
     }
 }
 
@@ -1930,15 +1948,25 @@ void init_forcerec(FILE *fp,
     }
 
     bGenericKernelOnly = FALSE;
+
+    /* We now check in the NS code whether a particular combination of interactions
+     * can be used with water optimization, and disable it if that is not the case.
+     */
+
     if (getenv("GMX_NB_GENERIC") != NULL)
     {
         if (fp != NULL)
         {
             fprintf(fp,
                     "Found environment variable GMX_NB_GENERIC.\n"
-                    "Disabling interaction-specific nonbonded kernels.\n\n");
+                    "Disabling all interaction-specific nonbonded kernels, will only\n"
+                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
         }
         bGenericKernelOnly = TRUE;
+    }
+
+    if (bGenericKernelOnly==TRUE)
+    {
         bNoSolvOpt         = TRUE;
     }
 
@@ -1953,6 +1981,8 @@ void init_forcerec(FILE *fp,
         }
     }
 
+    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+
     /* Check if we can/should do all-vs-all kernels */
     fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
     fr->AllvsAll_work   = NULL;
@@ -2001,6 +2031,7 @@ void init_forcerec(FILE *fp,
             fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
         }
     }
+
     fr->rc_scaling = ir->refcoord_scaling;
     copy_rvec(ir->posres_com,fr->posres_com);
     copy_rvec(ir->posres_comB,fr->posres_comB);
@@ -2009,9 +2040,77 @@ void init_forcerec(FILE *fp,
     fr->eeltype    = ir->coulombtype;
     fr->vdwtype    = ir->vdwtype;
 
-    fr->coul_pot_shift = (ir->coulomb_modifier == eintmodPOTSHIFT);
-    fr->vdw_pot_shift  = (ir->vdw_modifier     == eintmodPOTSHIFT);
-    
+    fr->coulomb_modifier = ir->coulomb_modifier;
+    fr->vdw_modifier     = ir->vdw_modifier;
+
+    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
+    switch(fr->eeltype)
+    {
+        case eelCUT:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
+            break;
+
+        case eelRF:
+        case eelGRF:
+        case eelRF_NEC:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+            break;
+
+        case eelRF_ZERO:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
+            break;
+
+        case eelSWITCH:
+        case eelSHIFT:
+        case eelUSER:
+        case eelENCADSHIFT:
+        case eelPMESWITCH:
+        case eelPMEUSER:
+        case eelPMEUSERSWITCH:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+            break;
+
+        case eelPME:
+        case eelEWALD:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
+            break;
+
+        default:
+            gmx_fatal(FARGS,"Unsupported electrostatic interaction: %s",eel_names[fr->eeltype]);
+            break;
+    }
+
+    /* Vdw: Translate from mdp settings to kernel format */
+    switch(fr->vdwtype)
+    {
+        case evdwCUT:
+            if(fr->bBHAM)
+            {
+                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
+            }
+            else
+            {
+                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
+            }
+            break;
+
+        case evdwSWITCH:
+        case evdwSHIFT:
+        case evdwUSER:
+        case evdwENCADSHIFT:
+            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+            break;
+
+        default:
+            gmx_fatal(FARGS,"Unsupported vdw interaction: %s",evdw_names[fr->vdwtype]);
+            break;
+    }
+
+    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
+    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
+    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
+
     fr->bTwinRange = fr->rlistlong > fr->rlist;
     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
     
@@ -2021,8 +2120,33 @@ void init_forcerec(FILE *fp,
     {
         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
                           !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
-        fr->bcoultab   = (!(fr->eeltype == eelCUT || EEL_RF(fr->eeltype)) ||
-                          fr->eeltype == eelRF_ZERO);
+        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
+        fr->bcoultab   = !(fr->eeltype == eelCUT ||
+                           fr->eeltype == eelEWALD ||
+                           fr->eeltype == eelPME ||
+                           fr->eeltype == eelRF ||
+                           fr->eeltype == eelRF_ZERO);
+
+        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
+         * going to be faster to tabulate the interaction than calling the generic kernel.
+         */
+        if(fr->nbkernel_elec_modifier==eintmodPOTSWITCH && fr->nbkernel_vdw_modifier==eintmodPOTSWITCH)
+        {
+            if((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
+            {
+                fr->bcoultab = TRUE;
+            }
+        }
+        else if((fr->nbkernel_elec_modifier==eintmodPOTSHIFT && fr->nbkernel_vdw_modifier==eintmodPOTSHIFT) ||
+                ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
+                  fr->nbkernel_elec_modifier==eintmodEXACTCUTOFF &&
+                  (fr->nbkernel_vdw_modifier==eintmodPOTSWITCH || fr->nbkernel_vdw_modifier==eintmodPOTSHIFT))))
+        {
+            if(fr->rcoulomb != fr->rvdw)
+            {
+                fr->bcoultab = TRUE;
+            }
+        }
 
         if (getenv("GMX_REQUIRE_TABLES"))
         {
@@ -2035,6 +2159,17 @@ void init_forcerec(FILE *fp,
             fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
             fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
         }
+
+        if(fr->bvdwtab==TRUE)
+        {
+            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+            fr->nbkernel_vdw_modifier    = eintmodNONE;
+        }
+        if(fr->bcoultab==TRUE)
+        {
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+            fr->nbkernel_elec_modifier    = eintmodNONE;
+        }
     }
 
     if (ir->cutoff_scheme == ecutsVERLET)
@@ -2126,7 +2261,6 @@ void init_forcerec(FILE *fp,
     
     if (fr->nbfp == NULL) {
         fr->ntype = mtop->ffparams.atnr;
-        fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
         fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
     }
     
@@ -2236,10 +2370,10 @@ void init_forcerec(FILE *fp,
      * A little unnecessary to make both vdw and coul tables sometimes,
      * but what the heck... */
     
-    bTab = fr->bcoultab || fr->bvdwtab;
+    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 
     bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
-                  fr->bBHAM) &&
+                  fr->bBHAM || fr->bEwald) &&
                  (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
                   gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
                   gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
@@ -2284,7 +2418,7 @@ void init_forcerec(FILE *fp,
         if (bNormalnblists) {
             make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
             if (!bSep14tab)
-                fr->tab14 = fr->nblists[0].tab;
+                fr->tab14 = fr->nblists[0].table_elec_vdw;
             m = 1;
         } else {
             m = 0;
@@ -2395,14 +2529,16 @@ void init_forcerec(FILE *fp,
     
     /* Initialize neighbor search */
     init_ns(fp,cr,&fr->ns,fr,mtop,box);
-    
+
     if (cr->duty & DUTY_PP)
     {
-        gmx_setup_kernels(fp,fr,bGenericKernelOnly);
-        if (ir->bAdress)
+        gmx_nonbonded_setup(fp,fr,bGenericKernelOnly);
+    /*
+     if (ir->bAdress)
         {
             gmx_setup_adress_kernels(fp,bGenericKernelOnly);
         }
+     */
     }
 
     /* Initialize the thread working data for bonded interactions */
@@ -2418,13 +2554,10 @@ void init_forcerec(FILE *fp,
         }
 
         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
-
-        /* initialize interaction constants
-         * TODO should be moved out during modularization.
-         */
-        init_interaction_const(fp, &fr->ic, fr);
     }
 
+    /* fr->ic is used both by verlet and group kernels (to some extent) now */
+    init_interaction_const(fp, &fr->ic, fr, rtab);
     if (ir->eDispCorr != edispcNO)
     {
         calc_enervirdiff(fp,ir->eDispCorr,fr);
@@ -2447,7 +2580,7 @@ void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
   /*pr_int(fp,fr->cg0);
     pr_int(fp,fr->hcg);*/
   for(i=0; i<fr->nnblists; i++)
-    pr_int(fp,fr->nblists[i].tab.n);
+    pr_int(fp,fr->nblists[i].table_elec_vdw.n);
   pr_real(fp,fr->rcoulomb_switch);
   pr_real(fp,fr->rcoulomb);
   
index 0cd82072ef3e517dca49d57e02e9353888ec5234..4a0905cbe3e355f24e9186c616c06993bb2d5540 100644 (file)
@@ -1120,7 +1120,8 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
 #else
             genborn_allvsall_calc_still_radii(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
 #endif
-            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,cnt);
+            /* 13 flops in outer loop, 47 flops in inner loop */
+            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,md->homenr*13+cnt*47);
         }
         else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
         {
@@ -1140,14 +1141,13 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
 #else
             genborn_allvsall_calc_hct_obc_radii(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
 #endif
-            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,cnt);
+            /* 24 flops in outer loop, 183 in inner */
+            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,md->homenr*24+cnt*183);
         }
         else
         {
             gmx_fatal(FARGS,"Bad gb algorithm for all-vs-all interactions");
         }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
-
         return 0;
     }
     
@@ -1279,17 +1279,18 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
         switch(ir->gb_algorithm)
         {
             case egbSTILL:
-                inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nrj);
+                /* 17 flops per outer loop iteration, 47 flops per inner loop */
+                inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nri*17+nl->nrj*47);
                 break;
             case egbHCT:
             case egbOBC:
-                inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nrj);
+                /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
+                inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nri*61+nl->nrj*183);
                 break;
                 
             default:
                 break;
         }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,nl->nri);
     }
     
     return 0;        
@@ -1643,7 +1644,7 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
   
   /* Calculate the bonded GB-interactions using either table or analytical formula */
     enerd->term[F_GBPOL]       += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
-                                     fr->invsqrta,fr->dvda,fr->gbtab.tab,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
+                                     fr->invsqrta,fr->dvda,fr->gbtab.data,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
     
     /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
     enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr,born->nr,md->chargeA, born, fr->dvda, md, fr->epsfac);         
@@ -1678,8 +1679,8 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
         genborn_allvsall_calc_chainrule(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
 #endif
         cnt = md->homenr*(md->nr/2+1);
-        inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,cnt);
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
+        /* 9 flops for outer loop, 15 for inner */
+        inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,md->homenr*9+cnt*15);
         return;
     }
     
@@ -1706,9 +1707,8 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
 
     if(!fr->bAllvsAll)
     {
-        inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nrj);
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fr->gblist.nri);
-
+        /* 9 flops for outer loop, 15 for inner */
+        inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nri*9+fr->gblist.nrj*15);
     }
 }
 
index d14f434eedeb83f59610d324d04b818a5a71678f..5c6aec2a7970ed55860f5268dab5bc27ca9ac428 100644 (file)
@@ -704,7 +704,7 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
+             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 
     /* Clear the unused shake virial and pressure */
     clear_mat(shake_vir);
index 0441011087ea345e686a047b20c9093fad2146ff..fb3614274ca99f3921a390d054cc80bedb71c193 100644 (file)
@@ -506,14 +506,16 @@ void nbnxn_atomdata_init(FILE *fp,
         {
             if (i < ntype && j < ntype)
             {
-                /* We store the prefactor in the derivative of the potential
-                 * in the parameter to avoid multiplications in the inner loop.
+                /* fr->nbfp has been updated, so that array too now stores c6/c12 including
+                 * the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
                  */
                 c6  = nbfp[(i*ntype+j)*2  ];
                 c12 = nbfp[(i*ntype+j)*2+1];
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] =  6.0*c6;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = 12.0*c12;
-
+                nbat->nbfp[(i*nbat->ntype+j)*2  ] = c6;
+                nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+                c6  /= 6.0;
+                c12 /= 12.0;
+                
                 bCombGeom = bCombGeom &&
                     gmx_within_tol(c6*c6  ,nbfp[(i*ntype+i)*2  ]*nbfp[(j*ntype+j)*2  ],tol) &&
                     gmx_within_tol(c12*c12,nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1],tol);
index a97dd18d82026b3ee3c831dbe98af081503b9505..a323b95c3e7ffab9ebe10e3d1fbc9346b5d2958f 100644 (file)
@@ -89,7 +89,7 @@ static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp)
 
     pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
 
-    table_spline3_fill_ewald_lr(ftmp, NULL, tabsize, tableformatF,
+    table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
                                 1/tabscale, nbp->ewald_beta);
 
     /* If the table pointer == NULL the table is generated the first time =>
index 571e7047c78ebb574d99daf8e671a13628e9bc91..10dc410b49b96e96a1218f37193b54451b414f3d 100644 (file)
@@ -17,7 +17,7 @@
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
- * 
+ *
  * If you want to redistribute modifications, please consider that
  * scientific software is very special. Version control is crucial -
  * bugs must be traceable. We will be happy to consider code for
@@ -94,11 +94,11 @@ static void reallocate_nblist(t_nblist *nl)
 {
     if (gmx_debug_at)
     {
-        fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
-                nl->il_code,nl->maxnri); 
+        fprintf(debug,"reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, free_energy=%d), maxnri=%d\n",
+                nl->ielec,nl->ivdw,nl->igeometry,nl->free_energy,nl->maxnri);
     }
     srenew(nl->iinr,   nl->maxnri);
-    if (nl->enlist == enlistCG_CG)
+    if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
     {
         srenew(nl->iinr_end,nl->maxnri);
     }
@@ -107,60 +107,17 @@ static void reallocate_nblist(t_nblist *nl)
     srenew(nl->jindex, nl->maxnri+1);
 }
 
-/* ivdw/icoul are used to determine the type of interaction, so we
- * can set an innerloop index here. The obvious choice for this would have
- * been the vdwtype/coultype values in the forcerecord, but unfortunately 
- * those types are braindead - for instance both Buckingham and normal 
- * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
- * to determine which interaction is used. There is further no special value
- * for 'no interaction'. For backward compatibility with old TPR files we won't
- * change this in the 3.x series, so when calling this routine you should use:
- *
- * icoul=0 no coulomb interaction
- * icoul=1 cutoff standard coulomb
- * icoul=2 reaction-field coulomb
- * icoul=3 tabulated coulomb
- *
- * ivdw=0 no vdw interaction
- * ivdw=1 standard L-J interaction
- * ivdw=2 Buckingham
- * ivdw=3 tabulated vdw.
- *
- * Kind of ugly, but it works.
- */
-static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
+
+static void init_nblist(FILE *log, t_nblist *nl_sr,t_nblist *nl_lr,
                         int maxsr,int maxlr,
-                        int ivdw, int icoul, 
-                        gmx_bool bfree, int enlist)
+                        int ivdw, int ivdwmod,
+                        int ielec, int ielecmod,
+                        gmx_bool bfree, int igeometry)
 {
     t_nblist *nl;
     int      homenr;
     int      i,nn;
     
-    int inloop[20] =
-    { 
-        eNR_NBKERNEL_NONE,
-        eNR_NBKERNEL010,
-        eNR_NBKERNEL020,
-        eNR_NBKERNEL030,
-        eNR_NBKERNEL100,
-        eNR_NBKERNEL110,
-        eNR_NBKERNEL120,
-        eNR_NBKERNEL130,
-        eNR_NBKERNEL200,
-        eNR_NBKERNEL210,
-        eNR_NBKERNEL220,
-        eNR_NBKERNEL230,
-        eNR_NBKERNEL300,
-        eNR_NBKERNEL310,
-        eNR_NBKERNEL320,
-        eNR_NBKERNEL330,
-        eNR_NBKERNEL400,
-        eNR_NBKERNEL410,
-        eNR_NBKERNEL_NONE,
-        eNR_NBKERNEL430
-    };
-  
     for(i=0; (i<2); i++)
     {
         nl     = (i == 0) ? nl_sr : nl_lr;
@@ -170,47 +127,24 @@ static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
         {
             continue;
         }
-        
+
+
         /* Set coul/vdw in neighborlist, and for the normal loops we determine
          * an index of which one to call.
          */
-        nl->ivdw  = ivdw;
-        nl->icoul = icoul;
+        nl->ivdw        = ivdw;
+        nl->ivdwmod     = ivdwmod;
+        nl->ielec       = ielec;
+        nl->ielecmod    = ielecmod;
         nl->free_energy = bfree;
-    
+        nl->igeometry   = igeometry;
+
         if (bfree)
         {
-            nl->enlist  = enlistATOM_ATOM;
-            nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
+            nl->igeometry  = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
         }
-        else
-        {
-            nl->enlist = enlist;
-
-            nn = inloop[4*icoul + ivdw];
-            
-            /* solvent loops follow directly after the corresponding
-            * ordinary loops, in the order:
-            *
-            * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
-            *   
-            */
-            switch (enlist) {
-            case enlistATOM_ATOM:
-            case enlistCG_CG:
-                break;
-            case enlistSPC_ATOM:     nn += 1; break;
-            case enlistSPC_SPC:      nn += 2; break;
-            case enlistTIP4P_ATOM:   nn += 3; break;
-            case enlistTIP4P_TIP4P:  nn += 4; break;
-            }
-            
-            nl->il_code = nn;
-        }
-
-        if (debug)
-            fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
-                    nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
+        
+        gmx_nonbonded_set_kernel_pointers( (i==0) ? log : NULL,nl);
         
         /* maxnri is influenced by the number of shifts (maximum is 8)
          * and the number of energy groups.
@@ -229,6 +163,13 @@ static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
         nl->jindex      = NULL;
         reallocate_nblist(nl);
         nl->jindex[0] = 0;
+
+        if(debug)
+        {
+            fprintf(debug,"Initiating neighbourlist (ielec=%d, ivdw=%d, free=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
+                    nl->ielec,nl->ivdw,nl->free_energy,gmx_nblist_geometry_names[nl->igeometry],maxsr,maxlr);
+        }
+
 #ifdef GMX_THREAD_SHM_FDECOMP
         nl->counter = 0;
         snew(nl->mtx,1);
@@ -245,9 +186,9 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
     * cache trashing.
     */
    int maxsr,maxsr_wat,maxlr,maxlr_wat;
-   int icoul,icoulf,ivdw;
+   int ielec,ielecf,ivdw,ielecmod,ielecmodf,ivdwmod;
    int solvent;
-   int enlist_def,enlist_w,enlist_ww;
+   int igeometry_def,igeometry_w,igeometry_ww;
    int i;
    t_nblists *nbl;
 
@@ -274,46 +215,20 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
      maxlr = maxlr_wat = 0;
    }  
 
-   /* Determine the values for icoul/ivdw. */
-   /* Start with GB */
-   if(fr->bGB)
-   {
-       icoul=enbcoulGB;
-   }
-   else if (fr->bcoultab)
-   {
-       icoul = enbcoulTAB;
-   }
-   else if (EEL_RF(fr->eeltype))
-   {
-       icoul = enbcoulRF;
-   }
-   else 
-   {
-       icoul = enbcoulOOR;
-   }
-   
-   if (fr->bvdwtab)
-   {
-       ivdw = enbvdwTAB;
-   }
-   else if (fr->bBHAM)
-   {
-       ivdw = enbvdwBHAM;
-   }
-   else 
-   {
-       ivdw = enbvdwLJ;
-   }
+   /* Determine the values for ielec/ivdw. */
+   ielec = fr->nbkernel_elec_interaction;
+   ivdw  = fr->nbkernel_vdw_interaction;
+   ielecmod = fr->nbkernel_elec_modifier;
+   ivdwmod  = fr->nbkernel_vdw_modifier;
 
    fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
    if (!fr->ns.bCGlist)
    {
-       enlist_def = enlistATOM_ATOM;
+       igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
    }
    else
    {
-       enlist_def = enlistCG_CG;
+       igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
        if (log != NULL)
        {
            fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
@@ -321,55 +236,75 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
    }
    
    if (fr->solvent_opt == esolTIP4P) {
-       enlist_w  = enlistTIP4P_ATOM;
-       enlist_ww = enlistTIP4P_TIP4P;
+       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
+       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
    } else {
-       enlist_w  = enlistSPC_ATOM;
-       enlist_ww = enlistSPC_SPC;
+       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
+       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
    }
 
    for(i=0; i<fr->nnblists; i++) 
    {
        nbl = &(fr->nblists[i]);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
-                   maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
-                   maxsr,maxlr,ivdw,0,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
-                   maxsr,maxlr,0,icoul,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
-                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
-       init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
-                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
-                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
-       init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
-                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
+                   maxsr,maxlr,ivdw,ivdwmod,ielec,ielecmod,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
+                   maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
+                   maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
+                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_w);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
+                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_w);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
+                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_ww);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
+                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_ww);
+
+       /* Did we get the solvent loops so we can use optimized water kernels? */
+       if(nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf==NULL
+          || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf==NULL
+#ifndef DISABLE_WATERWATER_NLIST
+          || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf==NULL
+          || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf==NULL
+#endif
+          )
+       {
+           fr->solvent_opt = esolNO;
+           fprintf(log,"Note: The available nonbonded kernels do not support water optimization - disabling.\n");
+       }
        
        if (fr->efep != efepNO) 
        {
            if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
            {
-               icoulf = enbcoulFEWALD;
+               ielecf = GMX_NBKERNEL_ELEC_EWALD;
+               ielecmodf = eintmodNONE;
            }
            else
            {
-               icoulf = icoul;
+               ielecf = ielec;
+               ielecmodf = ielecmod;
            }
 
-           init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
-                       maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
-           init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
-                       maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
-           init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
-                       maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
+           init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
+                       maxsr,maxlr,ivdw,ivdwmod,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+           init_nblist(log,&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
+                       maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+           init_nblist(log,&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
+                       maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
        }  
    }
    /* QMMM MM list */
    if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
    {
-       init_nblist(&fr->QMMMlist,NULL,
-                   maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
+       init_nblist(log,&fr->QMMMlist,NULL,
+                   maxsr,maxlr,0,0,ielec,ielecmod,FALSE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+   }
+
+   if(log!=NULL)
+   {
+       fprintf(log,"\n");
    }
 
    fr->ns.nblist_initialized=TRUE;
@@ -386,27 +321,28 @@ static void reset_nblist(t_nblist *nl)
      }
 }
 
-static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
+static void reset_neighbor_lists(t_forcerec *fr,gmx_bool bResetSR, gmx_bool bResetLR)
 {
     int n,i;
   
-    if (bLR) 
+    if (fr->bQMMM)
     {
-        reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
+        /* only reset the short-range nblist */
+        reset_nblist(&(fr->QMMMlist));
     }
-    else 
+
+    for(n=0; n<fr->nnblists; n++)
     {
-        for(n=0; n<fr->nnblists; n++)
+        for(i=0; i<eNL_NR; i++)
         {
-            for(i=0; i<eNL_NR; i++)
+            if(bResetSR)
             {
-                reset_nblist(&(fr->nblists[n].nlist_sr[i]));
+                reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
+            }
+            if(bResetLR)
+            {
+                reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
             }
-        }
-        if (fr->bQMMM)
-        { 
-            /* only reset the short-range nblist */
-            reset_nblist(&(fr->QMMMlist));
         }
     }
 }
@@ -494,36 +430,26 @@ static inline void close_nblist(t_nblist *nlist)
     }
 }
 
-static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
-                                       gmx_bool bMakeQMMMnblist)
+static inline void close_neighbor_lists(t_forcerec *fr,gmx_bool bMakeQMMMnblist)
 {
     int n,i;
     
-    if (bMakeQMMMnblist) {
-        if (!bLR)
-        {
+    if (bMakeQMMMnblist)
+    {
             close_nblist(&(fr->QMMMlist));
-        }
     }
-    else 
+
+    for(n=0; n<fr->nnblists; n++)
     {
-        if (bLR)
+        for(i=0; (i<eNL_NR); i++)
         {
-            close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
-        }
-        else
-        { 
-            for(n=0; n<fr->nnblists; n++)
-            {
-                for(i=0; (i<eNL_NR); i++)
-                {
-                    close_nblist(&(fr->nblists[n].nlist_sr[i]));
-                }
-            }
+            close_nblist(&(fr->nblists[n].nlist_sr[i]));
+            close_nblist(&(fr->nblists[n].nlist_lr[i]));
         }
     }
 }
 
+
 static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
 {
     int nrj=nlist->nrj;
@@ -532,8 +458,8 @@ static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
     {
         nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
         if (gmx_debug_at)
-            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
+                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
         
         srenew(nlist->jjnr,nlist->maxnrj);
     }
@@ -554,8 +480,8 @@ static inline void add_j_to_nblist_cg(t_nblist *nlist,
     {
         nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
         if (gmx_debug_at)
-            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
+                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
         
         srenew(nlist->jjnr    ,nlist->maxnrj);
         srenew(nlist->jjnr_end,nlist->maxnrj);
@@ -599,9 +525,10 @@ put_in_list_t(gmx_bool              bHaveVdW[],
               t_excl            bExcl[],
               int               shift,
               t_forcerec *      fr,
-              gmx_bool              bLR,
-              gmx_bool              bDoVdW,
-              gmx_bool              bDoCoul);
+              gmx_bool          bLR,
+              gmx_bool          bDoVdW,
+              gmx_bool          bDoCoul,
+              int               solvent_opt);
 
 static void 
 put_in_list_at(gmx_bool              bHaveVdW[],
@@ -615,9 +542,10 @@ put_in_list_at(gmx_bool              bHaveVdW[],
                t_excl            bExcl[],
                int               shift,
                t_forcerec *      fr,
-               gmx_bool              bLR,
-               gmx_bool              bDoVdW,
-               gmx_bool              bDoCoul)
+               gmx_bool          bLR,
+               gmx_bool          bDoVdW,
+               gmx_bool          bDoCoul,
+               int               solvent_opt)
 {
     /* The a[] index has been removed,
      * to put it back in i_atom should be a[i0] and jj should be a[jj].
@@ -658,7 +586,8 @@ put_in_list_at(gmx_bool              bHaveVdW[],
     
     /* Get the i charge group info */
     igid   = GET_CGINFO_GID(cginfo[icg]);
-    iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
+
+    iwater = (solvent_opt!=esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
     
     bFreeEnergy = FALSE;
     if (md->nPerturbed) 
@@ -1152,9 +1081,10 @@ put_in_list_qmmm(gmx_bool              bHaveVdW[],
                  t_excl            bExcl[],
                  int               shift,
                  t_forcerec *      fr,
-                 gmx_bool              bLR,
-                 gmx_bool              bDoVdW,
-                 gmx_bool              bDoCoul)
+                 gmx_bool          bLR,
+                 gmx_bool          bDoVdW,
+                 gmx_bool          bDoCoul,
+                 int               solvent_opt)
 {
     t_nblist *   coul;
     int          i,j,jcg,igid,gid;
@@ -1214,9 +1144,10 @@ put_in_list_cg(gmx_bool              bHaveVdW[],
                t_excl            bExcl[],
                int               shift,
                t_forcerec *      fr,
-               gmx_bool              bLR,
-               gmx_bool              bDoVdW,
-               gmx_bool              bDoCoul)
+               gmx_bool          bLR,
+               gmx_bool          bDoVdW,
+               gmx_bool          bDoCoul,
+               int               solvent_opt)
 {
     int          cginfo;
     int          igid,gid,nbl_ind;
@@ -1444,7 +1375,7 @@ static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
     if (nsbuf->nj + nrj > MAX_CG)
     {
         put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
-                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
+                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
         /* Reset buffer contents */
         nsbuf->ncg = nsbuf->nj = 0;
     }
@@ -1518,7 +1449,7 @@ static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
                 }
             }
         }
-    } 
+    }
     else
     {
         for(j=0; (j<njcg); j++)
@@ -1616,7 +1547,7 @@ static int ns_simple_core(t_forcerec *fr,
                 if (nsbuf->ncg > 0)
                 {
                     put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
-                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
+                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                     nsbuf->ncg=nsbuf->nj=0;
                 }
             }
@@ -1624,7 +1555,7 @@ static int ns_simple_core(t_forcerec *fr,
         /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
         setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
     }
-    close_neighbor_list(fr,FALSE,-1,-1,FALSE);
+    close_neighbor_lists(fr,FALSE);
     
     return nsearch;
 }
@@ -1785,56 +1716,13 @@ static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
  *
  ****************************************************/
 
-static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
-                         int ngid,t_mdatoms *md,int icg,
-                         int jgid,int nlr,
-                         atom_id lr[],t_excl bexcl[],int shift,
-                         rvec x[],rvec box_size,t_nrnb *nrnb,
-                         real *lambda,real *dvdlambda,
-                         gmx_grppairener_t *grppener,
-                         gmx_bool bDoVdW,gmx_bool bDoCoul,
-                         gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
-                         gmx_bool bHaveVdW[],
-                         gmx_bool bDoForces,rvec *f)
-{
-    int n,i;
-    t_nblist *nl;
-    
-    for(n=0; n<fr->nnblists; n++)
-    {
-        for(i=0; (i<eNL_NR); i++)
-        {
-            nl = &fr->nblists[n].nlist_lr[i];
-            if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
-            {
-                close_neighbor_list(fr,TRUE,n,i,FALSE);
-                /* Evaluate the energies and forces */
-                do_nonbonded(cr,fr,x,f,md,NULL,
-                             grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
-                             grppener->ener[egCOULLR],
-                                                        grppener->ener[egGB],box_size,
-                             nrnb,lambda,dvdlambda,n,i,
-                             GMX_DONB_LR | GMX_DONB_FORCES);
-                
-                reset_neighbor_list(fr,TRUE,n,i);
-            }
-        }
-    }
-    
-    if (!bEvaluateNow)
-    {  
-        /* Put the long range particles in a list */
-        /* do_longrange is never called for QMMM  */
-        put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
-                    bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
-    }
-}
 
 static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
                         real *rvdw2,real *rcoul2,
                         real *rs2,real *rm2,real *rl2)
 {
     *rs2 = sqr(fr->rlist);
+
     if (bDoLongRange && fr->bTwinRange)
     {
         /* The VdW and elec. LR cut-off's could be different,
@@ -1883,26 +1771,16 @@ static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
     snew(ns->nlr_ljc,ngid);
     snew(ns->nlr_one,ngid);
     
-    if (rm2 > rs2)
-    {
-            /* Long range VdW and Coul buffers */
-        snew(ns->nl_lr_ljc,ngid);
-    }
-    if (rl2 > rm2)
-    {
-        /* Long range VdW or Coul only buffers */
-        snew(ns->nl_lr_one,ngid);
-    }
+    /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
+    /* Long range VdW and Coul buffers */
+    snew(ns->nl_lr_ljc,ngid);
+    /* Long range VdW or Coul only buffers */
+    snew(ns->nl_lr_one,ngid);
+
     for(j=0; (j<ngid); j++) {
         snew(ns->nl_sr[j],MAX_CG);
-        if (rm2 > rs2)
-        {
-            snew(ns->nl_lr_ljc[j],MAX_CG);
-        }
-        if (rl2 > rm2)
-        {
-            snew(ns->nl_lr_one[j],MAX_CG);
-        }
+        snew(ns->nl_lr_ljc[j],MAX_CG);
+        snew(ns->nl_lr_one[j],MAX_CG);
     }
     if (debug)
     {
@@ -1922,8 +1800,7 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                        gmx_grppairener_t *grppener,
                        put_in_list_t *put_in_list,
                        gmx_bool bHaveVdW[],
-                       gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
-                       gmx_bool bMakeQMMMnblist)
+                       gmx_bool bDoLongRange,gmx_bool bMakeQMMMnblist)
 {
     gmx_ns_t *ns;
     atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
@@ -2273,10 +2150,11 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                                                         {
                                                             if (nsr[jgid] >= MAX_CG)
                                                             {
+                                                                /* Add to short-range list */
                                                                 put_in_list(bHaveVdW,ngid,md,icg,jgid,
                                                                             nsr[jgid],nl_sr[jgid],
                                                                             cgs->index,/* cgsatoms, */ bexcl,
-                                                                            shift,fr,FALSE,TRUE,TRUE);
+                                                                            shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                                                                 nsr[jgid]=0;
                                                             }
                                                             nl_sr[jgid][nsr[jgid]++]=jjcg;
@@ -2285,33 +2163,22 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                                                         {
                                                             if (nlr_ljc[jgid] >= MAX_CG)
                                                             {
-                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                             nlr_ljc[jgid],
-                                                                             nl_lr_ljc[jgid],bexcl,shift,x,
-                                                                             box_size,nrnb,
-                                                                             lambda,dvdlambda,
-                                                                             grppener,
-                                                                             TRUE,TRUE,FALSE,
-                                                                             put_in_list,
-                                                                             bHaveVdW,
-                                                                             bDoForces,f);
+                                                                /* Add to LJ+coulomb long-range list */
+                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
+                                                                            nlr_ljc[jgid],nl_lr_ljc[jgid],top->cgs.index,
+                                                                            bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
                                                                 nlr_ljc[jgid]=0;
                                                             }
                                                             nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
                                                         }
                                                         else
                                                         {
-                                                            if (nlr_one[jgid] >= MAX_CG) {
-                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                             nlr_one[jgid],
-                                                                             nl_lr_one[jgid],bexcl,shift,x,
-                                                                             box_size,nrnb,
-                                                                             lambda,dvdlambda,
-                                                                             grppener,
-                                                                             rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                                                             put_in_list,
-                                                                             bHaveVdW,
-                                                                             bDoForces,f);
+                                                            if (nlr_one[jgid] >= MAX_CG)
+                                                            {
+                                                                /* Add to long-range list with only coul, or only LJ */
+                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
+                                                                            nlr_one[jgid],nl_lr_one[jgid],top->cgs.index,
+                                                                            bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
                                                                 nlr_one[jgid]=0;
                                                             }
                                                             nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
@@ -2333,24 +2200,21 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                         {
                             put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
                                         cgs->index, /* cgsatoms, */ bexcl,
-                                        shift,fr,FALSE,TRUE,TRUE);
+                                        shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                         }
                         
                         if (nlr_ljc[nn] > 0)
                         {
-                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
-                                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                                         lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
-                                         put_in_list,bHaveVdW,bDoForces,f);
+                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_ljc[nn],
+                                        nl_lr_ljc[nn],top->cgs.index,
+                                        bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
                         }
                         
                         if (nlr_one[nn] > 0)
                         {
-                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
-                                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                                         lambda,dvdlambda,grppener,
-                                         rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                         put_in_list,bHaveVdW,bDoForces,f);
+                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_one[nn],
+                                        nl_lr_one[nn],top->cgs.index,
+                                        bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
                         }
                     }
                 }
@@ -2359,28 +2223,14 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
         /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
         setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
     }
-    /* Perform any left over force calculations */
-    for (nn=0; (nn<ngid); nn++)
-    {
-        if (rm2 > rs2)
-        {
-            do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
-                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                         lambda,dvdlambda,grppener,
-                         TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
-        }
-        if (rl2 > rm2) {
-            do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
-                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                         lambda,dvdlambda,grppener,
-                         rvdw_lt_rcoul,rcoul_lt_rvdw,
-                         TRUE,put_in_list,bHaveVdW,bDoForces,f);
-        }
-    }
+    /* No need to perform any left-over force calculations anymore (as we used to do here)
+     * since we now save the proper long-range lists for later evaluation.
+     */
+
     debug_gmx();
-    
-    /* Close off short range neighbourlists */
-    close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
+     
+    /* Close neighbourlists */
+    close_neighbor_lists(fr,bMakeQMMMnblist);
     
     return nns;
 }
@@ -2522,8 +2372,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                       real *lambda,real *dvdlambda,
                       gmx_grppairener_t *grppener,
                       gmx_bool bFillGrid,
-                      gmx_bool bDoLongRange,
-                      gmx_bool bDoForces,rvec *f)
+                      gmx_bool bDoLongRangeNS)
 {
     t_block  *cgs=&(top->cgs);
     rvec     box_size,grid_x0,grid_x1;
@@ -2538,7 +2387,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
     t_grid   *grid;
     gmx_domdec_zones_t *dd_zones;
     put_in_list_t *put_in_list;
-       
+
     ns = &fr->ns;
 
     /* Set some local variables */
@@ -2571,7 +2420,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
     debug_gmx();
     
     /* Reset the neighbourlists */
-    reset_neighbor_list(fr,FALSE,-1,-1);
+    reset_neighbor_lists(fr,TRUE,TRUE);
     
     if (bGrid && bFillGrid)
     {
@@ -2657,8 +2506,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                               grid,x,ns->bexcl,ns->bExcludeAlleg,
                               nrnb,md,lambda,dvdlambda,grppener,
                               put_in_list,ns->bHaveVdW,
-                              bDoLongRange,bDoForces,f,
-                              FALSE);
+                              bDoLongRangeNS,FALSE);
         
         /* neighbour searching withouth QMMM! QM atoms have zero charge in
          * the classical calculation. The charge-charge interaction
@@ -2675,8 +2523,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                                    grid,x,ns->bexcl,ns->bExcludeAlleg,
                                    nrnb,md,lambda,dvdlambda,grppener,
                                    put_in_list_qmmm,ns->bHaveVdW,
-                                   bDoLongRange,bDoForces,f,
-                                   TRUE);
+                                   bDoLongRangeNS,TRUE);
         }
     }
     else 
@@ -2686,7 +2533,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                                  ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
     }
     debug_gmx();
-    
+
 #ifdef DEBUG
     pr_nsblock(log);
 #endif
index 0a0c9030f2481f1e6e13033b7c9db0acb28cd045..9232f50e0334f3f5c60bd7b43f3a12bdea8bd414 100644 (file)
@@ -568,13 +568,14 @@ void init_QMMMrec(t_commrec *cr,
       /* we now store the LJ C6 and C12 parameters in QM rec in case
        * we need to do an optimization 
        */
-      if(qr->qm[j]->bOPT || qr->qm[j]->bTS){
-       for(i=0;i<qm_nr;i++){
-         qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c6au;
-         qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c12au;
-       }
+      if(qr->qm[j]->bOPT || qr->qm[j]->bTS)
+      {
+          for(i=0;i<qm_nr;i++)
+          {
+              /* nbfp now includes the 6.0/12.0 derivative prefactors */
+              qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
+              qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
+          }
       }
       /* now we check for frontier QM atoms. These occur in pairs that
        * construct the vsite
@@ -629,18 +630,16 @@ void init_QMMMrec(t_commrec *cr,
     /* store QM atoms in the QMrec and initialise
      */
     init_QMrec(0,qr->qm[0],qm_nr,qm_arr,mtop,ir);
-    if(qr->qm[0]->bOPT || qr->qm[0]->bTS){
-      for(i=0;i<qm_nr;i++){
-       gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
-       qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c6au;
-       qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c12au;
-      }
-      
+    if(qr->qm[0]->bOPT || qr->qm[0]->bTS)
+    {
+        for(i=0;i<qm_nr;i++)
+        {
+            gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
+            /* nbfp now includes the 6.0/12.0 derivative prefactors */
+            qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
+            qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
+        }
     }
-    
-
 
     /* find frontier atoms and mark them true in the frontieratoms array.
      */
@@ -957,13 +956,11 @@ void update_QMMMrec(t_commrec *cr,
        */
       srenew(mm->c6,mm->nrMMatoms);
       srenew(mm->c12,mm->nrMMatoms);
-      for (i=0;i<mm->nrMMatoms;i++){
-       mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c6au;
-       mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c12au;
+      for (i=0;i<mm->nrMMatoms;i++)
+      {
+          /* nbfp now includes the 6.0/12.0 derivative prefactors */
+          mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c6au/6.0;
+          mm->c12[i] =C12(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c12au/12.0;
       }
       punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
     }
index 889d1d48580b536b8124d8fee91036dfa29679aa..e1c0e5ccb25af9e36e8532c7211b486afe6acbca 100644 (file)
@@ -760,7 +760,7 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
     bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
     bFillGrid     = (bNS && bStateChanged);
     bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
+    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
     bDoForces     = (flags & GMX_FORCE_FORCES);
     bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
     bUseGPU       = fr->nbv->bUseGPU;
@@ -1110,20 +1110,13 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
             }
         }
 
-        if (bSepLRF)
+        /* Clear the short- and long-range forces */
+        clear_rvecs(fr->natoms_force_constr,f);
+        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
         {
-            /* Add the long range forces to the short range forces */
-            for(i=0; i<fr->natoms_force_constr; i++)
-            {
-                copy_rvec(fr->f_twin[i],f[i]);
-            }
+            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
         }
-        else if (!(fr->bTwinRange && bNS))
-        {
-            /* Clear the short-range forces */
-            clear_rvecs(fr->natoms_force_constr,f);
-        }
-
+        
         clear_rvec(fr->vir_diag_posres);
 
         GMX_BARRIER(cr->mpi_comm_mygroup);
@@ -1149,13 +1142,25 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
     /* if we use the GPU turn off the nonbonded */
     do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
                       cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-                      x,hist,f,enerd,fcd,mtop,top,fr->born,
+                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
                       &(top->atomtypes),bBornRadii,box,
                       inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
                       ((nb_kernel_type == nbk8x8x8_CUDA || nb_kernel_type == nbk8x8x8_PlainC) 
                         ? flags&~GMX_FORCE_NONBONDED : flags),
                       &cycles_pme);
 
+    if(bSepLRF)
+    {
+        if (do_per_step(step,inputrec->nstcalclr))
+        {
+            /* Add the long range forces to the short range forces */
+            for(i=0; i<fr->natoms_force_constr; i++)
+            {
+                rvec_add(fr->f_twin[i],f[i],f[i]);
+            }
+        }
+    }
+    
     if (!bUseOrEmulGPU)
     {
         /* Maybe we should move this into do_force_lowlevel */
@@ -1410,7 +1415,7 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
     int    start,homenr;
     double mu[2*DIM];
     gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
-    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
+    gmx_bool   bDoLongRangeNS,bDoForces,bDoPotential,bSepLRF;
     gmx_bool   bDoAdressWF;
     matrix boxs;
     rvec   vzero,box_diag;
@@ -1446,13 +1451,18 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
         }
     }
 
-    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
-    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
-    bFillGrid     = (bNS && bStateChanged);
-    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
-    bDoForces     = (flags & GMX_FORCE_FORCES);
-    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
+    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
+    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
+    /* Should we update the long-range neighborlists at this step? */
+    bDoLongRangeNS = fr->bTwinRange && bNS;
+    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
+    bFillGrid      = (bNS && bStateChanged);
+    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
+    bDoForces      = (flags & GMX_FORCE_FORCES);
+    bDoPotential   = (flags & GMX_FORCE_ENERGY);
+    bSepLRF        = ((inputrec->nstcalclr>1) && bDoForces &&
+                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
+
     /* should probably move this to the forcerec since it doesn't change */
     bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 
@@ -1613,13 +1623,6 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
             mk_mshift(fplog,graph,fr->ePBC,box,x);
         }
 
-        /* Reset long range forces if necessary */
-        if (fr->bTwinRange)
-        {
-            /* Reset the (long-range) forces if necessary */
-            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
-        }
-
         /* Do the actual neighbour searching and if twin range electrostatics
          * also do the calculation of long range forces and energies.
          */
@@ -1627,7 +1630,7 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
         ns(fplog,fr,x,box,
            groups,&(inputrec->opts),top,mdatoms,
            cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
-           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
+           bDoLongRangeNS);
         if (bSepDVDL)
         {
             fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
@@ -1700,20 +1703,13 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
             }
         }
 
-        if (bSepLRF)
-        {
-            /* Add the long range forces to the short range forces */
-            for(i=0; i<fr->natoms_force_constr; i++)
-            {
-                copy_rvec(fr->f_twin[i],f[i]);
-            }
-        }
-        else if (!(fr->bTwinRange && bNS))
+        /* Clear the short- and long-range forces */
+        clear_rvecs(fr->natoms_force_constr,f);
+        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
         {
-            /* Clear the short-range forces */
-            clear_rvecs(fr->natoms_force_constr,f);
+            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
         }
-
+        
         clear_rvec(fr->vir_diag_posres);
 
         GMX_BARRIER(cr->mpi_comm_mygroup);
@@ -1738,13 +1734,25 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
     /* Compute the bonded and non-bonded energies and optionally forces */
     do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
                       cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-                      x,hist,f,enerd,fcd,mtop,top,fr->born,
+                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
                       &(top->atomtypes),bBornRadii,box,
                       inputrec->fepvals,lambda,
                       graph,&(top->excls),fr->mu_tot,
                       flags,
                       &cycles_pme);
 
+    if(bSepLRF)
+    {
+        if (do_per_step(step,inputrec->nstcalclr))
+        {
+            /* Add the long range forces to the short range forces */
+            for(i=0; i<fr->natoms_force_constr; i++)
+            {
+                rvec_add(fr->f_twin[i],f[i],f[i]);
+            }
+        }
+    }
+    
     cycles_force = wallcycle_stop(wcycle,ewcFORCE);
     GMX_BARRIER(cr->mpi_comm_mygroup);
 
@@ -2039,7 +2047,7 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
   double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
   double invscale,invscale2,invscale3;
   int    ri0,ri1,ri,i,offstart,offset;
-  real   scale,*vdwtab;
+  real   scale,*vdwtab,tabfactor,tmp;
 
   fr->enershiftsix = 0;
   fr->enershifttwelve = 0;
@@ -2059,8 +2067,8 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
                  "With dispersion correction rvdw-switch can not be zero "
                  "for vdw-type = %s",evdw_names[fr->vdwtype]);
 
-      scale  = fr->nblists[0].tab.scale;
-      vdwtab = fr->nblists[0].vdwtab;
+      scale  = fr->nblists[0].table_elec_vdw.scale;
+      vdwtab = fr->nblists[0].table_vdw.data;
 
       /* Round the cut-offs to exact table values for precision */
       ri0 = floor(fr->rvdw_switch*scale);
@@ -2070,10 +2078,14 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
       rc3 = r0*r0*r0;
       rc9  = rc3*rc3*rc3;
 
-      if (fr->vdwtype == evdwSHIFT) {
-       /* Determine the constant energy shift below rvdw_switch */
-       fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
-       fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
+      if (fr->vdwtype == evdwSHIFT)
+      {
+          /* Determine the constant energy shift below rvdw_switch.
+           * Table has a scale factor since we have scaled it down to compensate
+           * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
+           */
+          fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
+          fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
       }
       /* Add the constant part from 0 to rvdw_switch.
        * This integration from 0 to rvdw_switch overcounts the number
@@ -2100,9 +2112,19 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
       for (i=0;i<2;i++) {
         enersum = 0.0; virsum = 0.0;
         if (i==0)
-         offstart = 0;
-       else
-         offstart = 4;
+        {
+            offstart = 0;
+            /* Since the dispersion table has been scaled down a factor 6.0 and the repulsion
+             * a factor 12.0 to compensate for the c6/c12 parameters inside nbfp[] being scaled
+             * up (to save flops in kernels), we need to correct for this.
+             */
+            tabfactor = 6.0;
+        }
+        else
+        {
+            offstart = 4;
+            tabfactor = 12.0;
+        }
        for (ri=ri0; ri<ri1; ri++) {
           r = ri*invscale;
           ea = invscale3;
@@ -2114,22 +2136,19 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
           pc = 3.0*invscale*r*r;
           pd = r*r*r;
 
-          /* this "8" is from the packing in the vdwtab array - perhaps
-           should be #define'ed? */
+          /* this "8" is from the packing in the vdwtab array - perhaps should be #define'ed? */
           offset = 8*ri + offstart;
           y0 = vdwtab[offset];
-          f = vdwtab[offset+1];
-          g = vdwtab[offset+2];
-          h = vdwtab[offset+3];
-
-          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
-            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
-          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
-            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
+          f  = vdwtab[offset+1];
+          g  = vdwtab[offset+2];
+          h  = vdwtab[offset+3];
 
+          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
+          virsum  += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
         }
-        enersum *= 4.0*M_PI;
-        virsum  *= 4.0*M_PI;
+          
+        enersum *= 4.0*M_PI*tabfactor;
+        virsum  *= 4.0*M_PI*tabfactor;
         eners[i] -= enersum;
         virs[i]  -= virsum;
       }
@@ -2149,7 +2168,7 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
       /* Contribution beyond the cut-off */
       eners[0] += -4.0*M_PI/(3.0*rc3);
       eners[1] +=  4.0*M_PI/(9.0*rc9);
-      if (fr->vdw_pot_shift) {
+      if (fr->vdw_modifier==eintmodPOTSHIFT) {
           /* Contribution within the cut-off */
           eners[0] += -4.0*M_PI/(3.0*rc3);
           eners[1] +=  4.0*M_PI/(3.0*rc9);
index 0c26ea947a810e4fe2d1e475b7b1970c8fd45175..8b2228919c26dec9ac3cc314e661f0112e23d89f 100644 (file)
@@ -139,12 +139,14 @@ static double v_ewald_lr(double beta,double r)
     }
 }
 
-void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
-                                 int ntab,int tableformat,
-                                 real dx,real beta)
+void table_spline3_fill_ewald_lr(real *table_f,
+                                 real *table_v,
+                                 real *table_fdv0,
+                                 int   ntab,
+                                 real  dx,
+                                 real  beta)
 {
     real tab_max;
-    int stride=0;
     int i,i_inrange;
     double dc,dc_new;
     gmx_bool bOutOfRange;
@@ -168,13 +170,6 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
      * The rms force error is the max error times 1/sqrt(5)=0.45.
      */
 
-    switch (tableformat)
-    {
-    case tableformatF:    stride = 1; break;
-    case tableformatFDV0: stride = 4; break;
-    default: gmx_incons("Unknown table format");
-    }
-
     bOutOfRange = FALSE;
     i_inrange = ntab;
     v_inrange = 0;
@@ -198,20 +193,9 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
             vi = v_inrange - dc*(i - i_inrange)*dx;
         }
 
-        switch (tableformat)
+        if(table_v!=NULL)
         {
-        case tableformatF:
-            if (tabv != NULL)
-            {
-                tabv[i] = vi;
-            }
-            break;
-        case tableformatFDV0:
-            tabf[i*stride+2] = vi;
-            tabf[i*stride+3] = 0;
-            break;
-        default:
-            gmx_incons("Unknown table format");
+            table_v[i] = vi;
         }
 
         if (i == 0)
@@ -243,12 +227,12 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
         if (i == ntab - 1)
         {
             /* Fill the table with the force, minus the derivative of the spline */
-            tabf[i*stride] = -dc;
+            table_f[i] = -dc;
         }
         else
         {
             /* tab[i] will contain the average of the splines over the two intervals */
-            tabf[i*stride] += -0.5*dc;
+            table_f[i] += -0.5*dc;
         }
 
         if (!bOutOfRange)
@@ -274,19 +258,27 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
             }
         }
 
-        tabf[(i-1)*stride] = -0.5*dc;
+        table_f[(i-1)] = -0.5*dc;
     }
     /* Currently the last value only contains half the force: double it */
-    tabf[0] *= 2;
+    table_f[0] *= 2;
 
-    if (tableformat == tableformatFDV0)
+    if(table_v!=NULL && table_fdv0!=NULL)
     {
-        /* Store the force difference in the second entry */
-        for(i=0; i<ntab-1; i++)
+        /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
+         * init_ewald_f_table().
+         */
+        for(i=0;i<ntab-1;i++)
         {
-            tabf[i*stride+1] = tabf[(i+1)*stride] - tabf[i*stride];
+            table_fdv0[4*i]     = table_f[i];
+            table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
+            table_fdv0[4*i+2]   = table_v[i];
+            table_fdv0[4*i+3]   = 0.0;
         }
-        tabf[(ntab-1)*stride+1] = -tabf[i*stride];
+        table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
+        table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
+        table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
+        table_fdv0[4*(ntab-1)+3]  = 0.0;
     }
 }
 
@@ -344,7 +336,7 @@ static void evaluate_table(real VFtab[], int offset, int stride,
 }
 
 static void copy2table(int n,int offset,int stride,
-                      double x[],double Vtab[],double Ftab[],
+                      double x[],double Vtab[],double Ftab[],real scalefactor,
                       real dest[])
 {
 /* Use double prec. for the intermediary variables
@@ -370,10 +362,10 @@ static void copy2table(int n,int offset,int stride,
       H   = 0;
     }
     nn0 = offset + i*stride;
-    dest[nn0]   = Vtab[i];
-    dest[nn0+1] = F;
-    dest[nn0+2] = G;
-    dest[nn0+3] = H;
+    dest[nn0]   = scalefactor*Vtab[i];
+    dest[nn0+1] = scalefactor*F;
+    dest[nn0+2] = scalefactor*G;
+    dest[nn0+3] = scalefactor*H;
   }
 }
 
@@ -761,30 +753,31 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
 
     switch (tp) {
     case etabLJ6:
-      /* Dispersion */
-      Vtab  = -r6;
-      Ftab  = 6.0*Vtab/r;
-      break;
+            /* Dispersion */
+            Vtab = -r6;
+            Ftab = 6.0*Vtab/r;
+            break;
     case etabLJ6Switch:
     case etabLJ6Shift:
       /* Dispersion */
       if (r < rc) {      
-       Vtab  = -r6;
-       Ftab  = 6.0*Vtab/r;
+          Vtab = -r6;
+          Ftab = 6.0*Vtab/r;
+          break;
       }
       break;
     case etabLJ12:
-      /* Repulsion */
-      Vtab  = r12;
-      Ftab  = reppow*Vtab/r;
+            /* Repulsion */
+            Vtab  = r12;
+            Ftab  = reppow*Vtab/r;
       break;
     case etabLJ12Switch:
     case etabLJ12Shift:
       /* Repulsion */
       if (r < rc) {                
-       Vtab  = r12;
-       Ftab  = reppow*Vtab/r;
-      }  
+          Vtab  = r12;
+          Ftab  = reppow*Vtab/r;
+      }
       break;
        case etabLJ6Encad:
         if(r < rc) {
@@ -797,9 +790,9 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
         break;
     case etabLJ12Encad:
         if(r < rc) {
-            Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
-            Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
-        } else { /* r>rc */ 
+            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
+            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
+        } else { /* r>rc */
             Vtab  = 0;
             Ftab  = 0;
         } 
@@ -875,8 +868,8 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
     if ((r > r1) && bSwitch) {
       Ftab = Ftab*swi - Vtab*swi1;
       Vtab = Vtab*swi;
-    }  
-    
+    }
+
     /* Convert to single precision when we store to mem */
     td->v[i]  = Vtab;
     td->f[i]  = Ftab;
@@ -1019,7 +1012,8 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
   gmx_bool        b14only,bReadTab,bGenTab;
   real        x0,y0,yp;
   int         i,j,k,nx,nx0,tabsel[etiNR];
-  
+  real        scalefactor;
+
   t_forcetable table;
 
   b14only = (flags & GMX_MAKETABLES_14ONLY);
@@ -1039,6 +1033,12 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
   nx0             = 10;
   nx              = 0;
   
+  table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+  table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+  table.formatsize    = 4;
+  table.ninteractions = 3;
+  table.stride        = table.formatsize*table.ninteractions;
+
   /* Check whether we have to read or generate */
   bReadTab = FALSE;
   bGenTab  = FALSE;
@@ -1084,7 +1084,7 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
    * numbers per nx+1 data points. For performance reasons we want
    * the table data to be aligned to 16-byte.
    */
-  snew_aligned(table.tab, 12*(nx+1)*sizeof(real),16);
+  snew_aligned(table.data, 12*(nx+1)*sizeof(real),16);
 
   for(k=0; (k<etiNR); k++) {
     if (tabsel[k] != etabUSER) {
@@ -1099,7 +1099,27 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
                td[k].nx,b14only?"1-4 ":"",tprops[tabsel[k]].name,
                td[k].tabscale);
     }
-    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,table.tab);
+
+    /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
+     * by including the derivative constants (6.0 or 12.0) in the parameters, since
+     * we no longer calculate force in most steps. This means the c6/c12 parameters
+     * have been scaled up, so we need to scale down the table interactions too.
+     * It comes here since we need to scale user tables too.
+     */
+      if(k==etiLJ6)
+      {
+          scalefactor = 1.0/6.0;
+      }
+      else if(k==etiLJ12 && tabsel[k]!=etabEXPMIN)
+      {
+          scalefactor = 1.0/12.0;
+      }
+      else
+      {
+          scalefactor = 1.0;
+      }
+
+    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,scalefactor,table.data);
     
     if (bDebugMode() && bVerbose) {
       if (b14only)
@@ -1109,7 +1129,7 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
       /* plot the output 5 times denser than the table data */
       for(i=5*((nx0+1)/2); i<5*table.n; i++) {
        x0 = i*table.r/(5*(table.n-1));
-       evaluate_table(table.tab,4*k,12,table.scale,x0,&y0,&yp);
+       evaluate_table(table.data,4*k,12,table.scale,x0,&y0,&yp);
        fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
       }
       gmx_fio_fclose(fp);
@@ -1154,12 +1174,17 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
         * use etiNR (since we only have one table, but ...) 
         */
        snew(td,1);
-       table.r         = fr->gbtabr;
-       table.scale     = fr->gbtabscale;
-       table.scale_exp = 0;
-       table.n         = table.scale*table.r;
-       nx0             = 0;
-       nx              = table.scale*table.r;
+    table.interaction   = GMX_TABLE_INTERACTION_ELEC;
+    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+       table.r             = fr->gbtabr;
+       table.scale         = fr->gbtabscale;
+       table.scale_exp     = 0;
+       table.n             = table.scale*table.r;
+    table.formatsize    = 4;
+    table.ninteractions = 1;
+    table.stride        = table.formatsize*table.ninteractions;
+       nx0                 = 0;
+       nx                  = table.scale*table.r;
        
        /* Check whether we have to read or generate 
         * We will always generate a table, so remove the read code
@@ -1177,7 +1202,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
         * to do this :-)
         */
        
-       snew_aligned(table.tab,4*nx,16);
+       snew_aligned(table.data,4*nx,16);
        
        init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
        
@@ -1206,7 +1231,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
                
     }
        
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
        
        if(bDebugMode())
     {
@@ -1217,7 +1242,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
                {
                        /* x0=i*table.r/(5*table.n); */
                        x0=i*table.r/table.n;
-                       evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+                       evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
                        fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
                        
                }
@@ -1235,7 +1260,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
         Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
         
         
-        evaluate_table(table.tab,0,4,table.scale,r,&y0,&yp);
+        evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
         printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
         
         abs_error_r=fabs(y0-Vtab);
@@ -1337,9 +1362,9 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
         * to do this :-)
         */
        
-    snew_aligned(table.tab,4*nx,16);
-       
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+    snew_aligned(table.data,4*nx,16);
+
+       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
        
        if(bDebugMode())
          {
@@ -1351,7 +1376,7 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
              {
                /* x0=i*table.r/(5*table.n); */
                x0 = i*table.r/(5*(table.n-1));
-               evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+               evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
                fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
                
              }
@@ -1360,6 +1385,13 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
 
        done_tabledata(&(td[0]));
        sfree(td);
+
+    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+    table.formatsize    = 4;
+    table.ninteractions = 3;
+    table.stride        = table.formatsize*table.ninteractions;
+
        
        return table;
 }
@@ -1386,8 +1418,8 @@ bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle)
   }
   tab.n = td.nx;
   tab.scale = td.tabscale;
-  snew(tab.tab,tab.n*4);
-  copy2table(tab.n,0,4,td.x,td.v,td.f,tab.tab);
+  snew(tab.data,tab.n*4);
+  copy2table(tab.n,0,4,td.x,td.v,td.f,1.0,tab.data);
   done_tabledata(&td);
 
   return tab;
index a5a2a71893bba4bf96061f4ee1e420b4b73888d2..9a08e4ba35ab727daf0fa8aaea5e457f5d8075a3 100644 (file)
@@ -568,7 +568,7 @@ double do_tpi(FILE *fplog,t_commrec *cr,
                          state->lambda,
                          NULL,fr,NULL,mu_tot,t,NULL,NULL,FALSE,
                          GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
-                         (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DOLR : 0) |
+                         (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
                          (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); 
                 cr->nnodes = nnodes;
                 bStateChanged = FALSE;
index 430bec3c523d0fdb609fb47737030daa506fac5b..932fc34f225b47acba1c4b643a6cbb580bda7676 100644 (file)
@@ -1205,7 +1205,7 @@ static void deform(gmx_update_t upd,
     }
 }
 
-static void combine_forces(int nstlist,
+static void combine_forces(int nstcalclr,
                            gmx_constr_t constr,
                            t_inputrec *ir,t_mdatoms *md,t_idef *idef,
                            t_commrec *cr,
@@ -1235,10 +1235,10 @@ static void combine_forces(int nstlist,
                   NULL,NULL,nrnb,econqForce,ir->epc==epcMTTK,state->veta,state->veta);
     }
 
-    /* Add nstlist-1 times the LR force to the sum of both forces
+    /* Add nstcalclr-1 times the LR force to the sum of both forces
      * and store the result in forces_lr.
      */
-    nm1 = nstlist - 1;
+    nm1 = nstcalclr - 1;
     for(i=start; i<nrend; i++)
     {
         for(d=0; d<DIM; d++)
@@ -1709,7 +1709,7 @@ void update_coords(FILE         *fplog,
     tensor           vir_con;
     rvec             *vcom,*xcom,*vall,*xall,*xin,*vin,*forcein,*fall,*xpall,*xprimein,*xprime;
     int              nth,th;
-
+    
     /* Running the velocity half does nothing except for velocity verlet */
     if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
         !EI_VV(inputrec->eI))
@@ -1740,14 +1740,14 @@ void update_coords(FILE         *fplog,
     bNH = inputrec->etc == etcNOSEHOOVER;
     bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
 
-    if (bDoLR && inputrec->nstlist > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
+    if (bDoLR && inputrec->nstcalclr > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
     {
-        /* Store the total force + nstlist-1 times the LR force
+        /* Store the total force + nstcalclr-1 times the LR force
          * in forces_lr, so it can be used in a normal update algorithm
          * to produce twin time stepping.
          */
         /* is this correct in the new construction? MRS */
-        combine_forces(inputrec->nstlist,constr,inputrec,md,idef,cr,
+        combine_forces(inputrec->nstcalclr,constr,inputrec,md,idef,cr,
                        step,state,bMolPBC,
                        start,nrend,f,f_lr,nrnb);
         force = f_lr;
index d1be14c8d246aaf77ddedf91a3cb78d2a5c83c4a..dd69de4b7806a72bc79503e6f679272708639ef2 100644 (file)
@@ -83,7 +83,7 @@ void make_wall_tables(FILE *fplog,const output_env_t oenv,
        /* Since wall have no charge, we can compress the table */
        for(i=0; i<=tab->n; i++)
          for(j=0; j<8; j++)
-           tab->tab[8*i+j] = tab->tab[12*i+4+j];
+           tab->data[8*i+j] = tab->data[12*i+4+j];
       }
     }
   }
@@ -167,8 +167,9 @@ real do_walls(t_inputrec *ir,t_forcerec *fr,matrix box,t_mdatoms *md,
                 /* The wall energy groups are always at the end of the list */
                 ggid = gid[i]*ngid + ngid - nwall + w;
                 at = type[i];
-                Cd = nbfp[ntw[w]+2*at];
-                Cr = nbfp[ntw[w]+2*at+1];
+                /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                Cd = nbfp[ntw[w]+2*at]/6.0;
+                Cr = nbfp[ntw[w]+2*at+1]/12.0;
                 if (!((Cd==0 && Cr==0) || (egp_flags[ggid] & EGP_EXCL)))
                 {
                     if (w == 0)
@@ -197,7 +198,7 @@ real do_walls(t_inputrec *ir,t_forcerec *fr,matrix box,t_mdatoms *md,
                         }
                         tab = &(fr->wall_tab[w][gid[i]]);
                         tabscale = tab->scale;
-                        VFtab    = tab->tab;
+                        VFtab    = tab->data;
                         
                         rt    = r*tabscale;
                         n0    = rt;
index 16a6b4c16eba2de3f3e355f408cf0efc0eec184f..a1a4932d25f136349c2f33f856d975d41c29de6e 100644 (file)
@@ -60,9 +60,9 @@ static void write_nblist(FILE *out,gmx_domdec_t *dd,t_nblist *nblist,int nDNL)
   gmx_domdec_zones_t *dd_zones;
 
   if (nblist->nri > 0) {  
-    fprintf(out,"il_name: %s  Solvent opt: %s\n",
-            nrnb_str(nblist->il_code),
-            enlist_names[nblist->enlist]);
+    fprintf(out,"ielec: %d, ivdw: %d, free_energy: %d, Solvent opt: %s\n",
+            nblist->ielec,nblist->ivdw,nblist->free_energy,
+            gmx_nblist_geometry_names[nblist->igeometry]);
     fprintf(out,"nri: %d  npair: %d\n",nblist->nri,nblist->nrj);
     if (dd) {
       dd_zones = domdec_zones(dd);
@@ -94,7 +94,7 @@ static void write_nblist(FILE *out,gmx_domdec_t *dd,t_nblist *nblist,int nDNL)
     if (nDNL >= 2) {
       for(i=0; i<nblist->nri; i++) {
        nii = 1;
-       if (nDNL >= 3 && nblist->enlist != enlistATOM_ATOM)
+       if (nDNL >= 3 && nblist->igeometry != GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE)
          nii = 3;
        nj = nblist->jindex[i+1] - nblist->jindex[i];
        fprintf(out,"i: %d shift: %d gid: %d nj: %d\n",
@@ -130,86 +130,7 @@ static void set_mat(FILE *fp,int **mat,int i0,int ni,int j0,int nj,
   }
 }
 
-int read_nblist(FILE *in,FILE *fp,int **mat,int natoms,gmx_bool bSymm)
-{
-    gmx_bool bNL;
-    char buf[256],b1[32],b2[32],solv[256],il_code[256];
-    int  i,ii,j,nnbl,full,icmp,nri,isolv;
-    int  iatom,nrj,nj,shift,gid,nargs,njtot=0;
-    
-    do {
-        if (fgets2(buf,255,in) == NULL)
-            gmx_fatal(FARGS,"EOF when looking for '%s' in logfile",header);
-    } while (strstr(buf,header) == NULL);
-    
-    do {
-      do {
-        if (fgets2(buf,255,in) == NULL)
-         return njtot;
-      } while (strstr(buf,"nri:") == NULL);
-      
-      if (0) {
-       if ((nargs = sscanf(buf,"%*s%s%*s%s",il_code,solv)) != 2) {
-         fprintf(stderr,"Can not find the right il_code\n");
-         return njtot;
-       }
-        for(isolv=0; (isolv<esolNR); isolv++)
-         if (strstr(esol_names[isolv],solv) != NULL)
-           break;
-       
-        if (isolv == esolNR) {
-         fprintf(stderr,"Can not read il_code or solv (nargs=%d)\n",nargs);
-         return njtot;
-        }
-      }
-      else
-       isolv = enlistATOM_ATOM;
-      
-      /* gmx_fatal(FARGS,"Can not read il_code or solv (nargs=%d)",nargs);*/
-      if ((nargs = sscanf(buf,"%*s%d%*s%d",&nri,&nrj)) != 2)
-       gmx_fatal(FARGS,"Can not read nri or nrj (nargs=%d)",nargs);
-      for(ii=0; (ii<nri); ii++) {
-       if ((nargs = fscanf(in,"%*s%d%*s%d%*s%d%*s%d",
-                           &iatom,&shift,&gid,&nj)) != 4)
-         gmx_fatal(FARGS,"Can not read iatom, shift gid or nj (nargs=%d)",nargs);
-       /* Number shifts from 1 to 27 iso 0 to 26 to distinguish uninitialized 
-        * matrix elements.
-        */
-       range_check(iatom,0,natoms);
-       for(i=0; (i<nj); i++) {
-         if ((nargs = fscanf(in,"%*s%d",&j)) != 1)
-           gmx_fatal(FARGS,"Can not read j");
-         range_check(j,0,natoms);
-         switch (isolv) {
-         case enlistATOM_ATOM:
-           set_mat(fp,mat,iatom,1,j,1,bSymm,shift);
-           njtot++;
-           break;
-         case enlistSPC_ATOM:
-           set_mat(fp,mat,iatom,3,j,1,bSymm,shift);
-           njtot+=3;
-           break;
-         case enlistSPC_SPC:
-           set_mat(fp,mat,iatom,3,j,3,bSymm,shift);
-           njtot+=9;
-           break;
-         case enlistTIP4P_ATOM:
-           set_mat(fp,mat,iatom,4,j,1,bSymm,shift);
-           njtot+=4;
-           break;
-         case enlistTIP4P_TIP4P:
-           set_mat(fp,mat,iatom,4,j,4,bSymm,shift);
-           njtot+=16;
-           break;
-         default:
-           gmx_incons("non-existing solvent type");
-         }
-       }
-      }
-      fprintf(fp,"nri = %d  nrj = %d\n",nri,nrj);
-    } while (TRUE);
-    return -1;
-}
+
 
 void dump_nblist(FILE *out,t_commrec *cr,t_forcerec *fr,int nDNL)
 {
index a474002228e533172b9b74c13570949b6ab8fe4b..0c4337e169d6ab4a5862897f5be7697d2bfd2048 100644 (file)
@@ -283,8 +283,7 @@ void do_nsgrid(FILE *fp,gmx_bool bVerbose,
   snew(dvdl,efptNR);
   init_neighbor_list(fp,fr,md->homenr);
   search_neighbours(fp,fr,x,box,top,
-                   &mtop->groups,cr,&nrnb,md,lambda,dvdl,NULL,
-                   TRUE,FALSE,FALSE,NULL);
+                   &mtop->groups,cr,&nrnb,md,lambda,dvdl,NULL,TRUE,FALSE);
 
   if (debug)
     dump_nblist(debug,cr,fr,0);
index ff0a5bec4a2ea478b5b8643d7ab4cb9507b42302..96e359fd9d32e07c584f2ee74475ee62e3d76d6f 100644 (file)
@@ -158,7 +158,7 @@ static void low_calc_pot(FILE *log,int nl_type,t_forcerec *fr,
   
   nlist = &fr->nblists[0].nlist_sr[nl_type];
   
-  c_tabpot(fr->nblists[0].tab.scale,fr->nblists[0].tab.tab,
+  c_tabpot(fr->nblists[0].table_elec_vdw.scale,fr->nblists[0].table_elec_vdw.data,
           nlist->nri,nlist->iinr,
           nlist->shift,nlist->jindex,nlist->jjnr,
           x[0],fr->epsfac,mdatoms->chargeA,pot,fr->shift_vec[0]);
@@ -191,7 +191,7 @@ void calc_pot(FILE *logf,t_commrec *cr,
    * also do the calculation of long range forces and energies.
    */
   ns(logf,fr,x,box,&mtop->groups,&(inputrec->opts),top,mdatoms,cr,
-     &nrnb,&lam[0],&dum[0],&enerd->grpp,TRUE,FALSE,FALSE,NULL);
+     &nrnb,&lam[0],&dum[0],&enerd->grpp,TRUE,FALSE);
   for(m=0; (m<DIM); m++)
     box_size[m] = box[m][m];
   for(i=0; (i<mdatoms->nr); i++)
index 73435a7941a88b68f99f74e636c2f4fcc874023e..871730bf4a7857bf25ef7794b654d7dccd34721b 100644 (file)
@@ -307,8 +307,8 @@ int gmx_h2order(int argc,char *argv[])
   
   if (bMicel)
     rd_index(opt2fn("-nm",NFILE,fnm), 1, &nmic, &micelle, &micname);
-
-  calc_h2order(ftp2fn(efTRX,NFILE,fnm), index, ngx, &slDipole, &slOrder, 
+    
+  calc_h2order(ftp2fn(efTRX,NFILE,fnm), index, ngx, &slDipole, &slOrder,
               &slWidth, &nslices, top, ePBC, axis, bMicel, micelle, nmic,
                oenv);